From dc9a83e8be292834895ff658fee5b317df15dddb Mon Sep 17 00:00:00 2001 From: denver Date: Tue, 11 Mar 2025 17:45:41 -0500 Subject: [PATCH 01/13] docs: overhaul sphinx doc generation to depend on generated .rst files - resolve errors --- graphdoc/docs/.nojekyll | 1 + graphdoc/docs/conf.py | 49 ++++++ graphdoc/docs/generate_docs.py | 92 ++++++++++ graphdoc/docs/graphdoc.config.rst | 8 + ...aphdoc.data.dspy_data.dspy_data_helper.rst | 8 + ....data.dspy_data.generation_data_helper.rst | 8 + ...doc.data.dspy_data.quality_data_helper.rst | 8 + graphdoc/docs/graphdoc.data.dspy_data.rst | 37 +--- graphdoc/docs/graphdoc.data.helper.rst | 8 + graphdoc/docs/graphdoc.data.local.rst | 8 + ...oc.data.mlflow_data.mlflow_data_helper.rst | 8 + graphdoc/docs/graphdoc.data.mlflow_data.rst | 21 +-- graphdoc/docs/graphdoc.data.parser.rst | 8 + graphdoc/docs/graphdoc.data.rst | 50 ++---- graphdoc/docs/graphdoc.data.schema.rst | 8 + .../docs/graphdoc.eval.doc_generator_eval.rst | 8 + graphdoc/docs/graphdoc.eval.rst | 21 +-- graphdoc/docs/graphdoc.main.rst | 8 + .../graphdoc.modules.doc_generator_module.rst | 8 + graphdoc/docs/graphdoc.modules.rst | 21 +-- graphdoc/docs/graphdoc.prompts.rst | 37 +--- ...graphdoc.prompts.schema_doc_generation.rst | 8 + .../graphdoc.prompts.schema_doc_quality.rst | 8 + .../docs/graphdoc.prompts.single_prompt.rst | 8 + graphdoc/docs/graphdoc.rst | 32 ++-- .../graphdoc.train.doc_generator_trainer.rst | 8 + .../graphdoc.train.doc_quality_trainer.rst | 8 + graphdoc/docs/graphdoc.train.optimizers.rst | 8 + graphdoc/docs/graphdoc.train.rst | 46 ++--- .../graphdoc.train.single_prompt_trainer.rst | 8 + graphdoc/docs/index.rst | 12 +- graphdoc/docs/links.rst | 5 - graphdoc/docs/modules.rst | 1 + graphdoc/docs/source/graphdoc.data.rst | 5 + graphdoc/docs/source/graphdoc.prompts.rst | 4 + graphdoc/docs/source/graphdoc.rst | 2 + graphdoc/docs/source/modules.rst | 7 - graphdoc/docs/tests.conftest.rst | 8 + graphdoc/docs/tests.rst | 18 ++ graphdoc/docs/tests.test_confest.rst | 8 + graphdoc/docs/tests.test_config.rst | 8 + graphdoc/graphdoc/config.py | 116 ++++++------- .../data/dspy_data/dspy_data_helper.py | 5 +- .../data/dspy_data/generation_data_helper.py | 2 + .../data/dspy_data/quality_data_helper.py | 17 +- graphdoc/graphdoc/data/helper.py | 7 +- graphdoc/graphdoc/data/local.py | 41 +++-- .../data/mlflow_data/mlflow_data_helper.py | 27 +-- graphdoc/graphdoc/data/parser.py | 59 ++++--- graphdoc/graphdoc/data/schema.py | 30 +++- graphdoc/graphdoc/docs/conf.py | 34 ++++ .../graphdoc/modules/doc_generator_module.py | 24 ++- graphdoc/graphdoc/prompts/__init__.py | 17 +- .../graphdoc/prompts/schema_doc_generation.py | 71 +++----- .../graphdoc/prompts/schema_doc_quality.py | 91 +++++----- graphdoc/graphdoc/prompts/single_prompt.py | 56 +++--- graphdoc/graphdoc/run.sh | 164 ++++++++++++++++++ .../graphdoc/train/doc_generator_trainer.py | 42 +++-- .../graphdoc/train/doc_quality_trainer.py | 32 ++-- .../graphdoc/train/single_prompt_trainer.py | 35 ++-- graphdoc/run.sh | 10 +- run.sh | 21 +++ 62 files changed, 1027 insertions(+), 511 deletions(-) create mode 100644 graphdoc/docs/.nojekyll create mode 100755 graphdoc/docs/generate_docs.py create mode 100644 graphdoc/docs/graphdoc.config.rst create mode 100644 graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst create mode 100644 graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst create mode 100644 graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst create mode 100644 graphdoc/docs/graphdoc.data.helper.rst create mode 100644 graphdoc/docs/graphdoc.data.local.rst create mode 100644 graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst create mode 100644 graphdoc/docs/graphdoc.data.parser.rst create mode 100644 graphdoc/docs/graphdoc.data.schema.rst create mode 100644 graphdoc/docs/graphdoc.eval.doc_generator_eval.rst create mode 100644 graphdoc/docs/graphdoc.main.rst create mode 100644 graphdoc/docs/graphdoc.modules.doc_generator_module.rst create mode 100644 graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst create mode 100644 graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst create mode 100644 graphdoc/docs/graphdoc.prompts.single_prompt.rst create mode 100644 graphdoc/docs/graphdoc.train.doc_generator_trainer.rst create mode 100644 graphdoc/docs/graphdoc.train.doc_quality_trainer.rst create mode 100644 graphdoc/docs/graphdoc.train.optimizers.rst create mode 100644 graphdoc/docs/graphdoc.train.single_prompt_trainer.rst delete mode 100644 graphdoc/docs/links.rst delete mode 100644 graphdoc/docs/source/modules.rst create mode 100644 graphdoc/docs/tests.conftest.rst create mode 100644 graphdoc/docs/tests.rst create mode 100644 graphdoc/docs/tests.test_confest.rst create mode 100644 graphdoc/docs/tests.test_config.rst create mode 100644 graphdoc/graphdoc/docs/conf.py create mode 100644 graphdoc/graphdoc/run.sh diff --git a/graphdoc/docs/.nojekyll b/graphdoc/docs/.nojekyll new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/graphdoc/docs/.nojekyll @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index a1387e4..9da3d41 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -20,6 +20,7 @@ "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", # Add intersphinx for better cross-referencing ] # Add Napoleon settings for Google-style docstrings @@ -52,3 +53,51 @@ import os import sys sys.path.insert(0, os.path.abspath('..')) + +# -- Suppress specific warnings ----------------------------------------------- +# This suppresses specific warning types that we want to ignore +suppress_warnings = [ + 'autodoc.duplicate_object_description', +] + +# Configure autodoc settings to handle duplicate signatures +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, + 'member-order': 'bysource', +} + +# Configure intersphinx mapping for external projects +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), +} + +# Ensure that objects are documented only once at their canonical location +canonical_module_mapping = {} + +def get_canonical_path(obj_module): + """Determines the canonical path for a module.""" + if obj_module in canonical_module_mapping: + return canonical_module_mapping[obj_module] + return obj_module + +def process_docstring(app, what, name, obj, options, lines): + """Process docstrings to add any needed directives or modify content.""" + # Add any processing here if needed + pass + +def process_signature(app, what, name, obj, options, signature, return_annotation): + """Process signatures to standardize them across the codebase.""" + # Add any processing here if needed + return (signature, return_annotation) + +# Configure nitpicky mode to be less strict +nitpicky = False + +def setup(app): + app.connect('autodoc-process-docstring', process_docstring) + app.connect('autodoc-process-signature', process_signature) + # Create static directory if it doesn't exist to avoid the warning + if not os.path.exists(os.path.join(os.path.dirname(__file__), '_static')): + os.makedirs(os.path.join(os.path.dirname(__file__), '_static')) diff --git a/graphdoc/docs/generate_docs.py b/graphdoc/docs/generate_docs.py new file mode 100755 index 0000000..43ef125 --- /dev/null +++ b/graphdoc/docs/generate_docs.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +""" +Script to automatically generate Sphinx documentation RST files. +Run this script before building the documentation to ensure all RST files are up-to-date. +""" +import os +import subprocess +import shutil +import sys + +def main(): + # Get the directory where this script is located + docs_dir = os.path.dirname(os.path.abspath(__file__)) + + # The path to the module we want to document + module_dir = os.path.abspath(os.path.join(docs_dir, '..')) + + # Where to output the rst files + output_dir = docs_dir + + # Clean up existing RST files except for special ones + preserve_files = ['index.rst', 'conf.py', 'generate_docs.py'] + for filename in os.listdir(output_dir): + filepath = os.path.join(output_dir, filename) + if (filename.endswith('.rst') and filename not in preserve_files and + os.path.isfile(filepath)): + print(f"Removing {filepath}") + os.unlink(filepath) + + # Run sphinx-apidoc + subprocess.run([ + 'sphinx-apidoc', + '-f', # Force overwriting of existing files + '-e', # Put module documentation before submodule documentation + '-M', # Put module documentation before member documentation + '-o', output_dir, # Output directory + module_dir, # Module directory + 'setup.py', # Exclude these files/patterns + '*tests*', + '*venv*', + '*docs*' + ]) + + # Add custom content to the module RST files + customize_rst_files(output_dir) + + print("\nRST files have been generated successfully!") + print("You can now build the documentation with: cd docs && make html") + +def customize_rst_files(output_dir): + """Add custom content to the RST files.""" + # Example: Add a note about auto-generation to each RST file + for filename in os.listdir(output_dir): + if filename.endswith('.rst') and filename != 'index.rst': + filepath = os.path.join(output_dir, filename) + with open(filepath, 'r') as f: + content = f.read() + + # Add noindex to submodules to prevent duplicates + content = content.replace( + ":show-inheritance:", + ":show-inheritance:\n :noindex:" + ) + + with open(filepath, 'w') as f: + f.write(content) + + # Create or update index.rst if it doesn't exist + index_path = os.path.join(output_dir, 'index.rst') + if not os.path.exists(index_path): + with open(index_path, 'w') as f: + f.write(""".. GraphDoc documentation master file + +Welcome to GraphDoc's documentation +================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` +""") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/graphdoc/docs/graphdoc.config.rst b/graphdoc/docs/graphdoc.config.rst new file mode 100644 index 0000000..4379af5 --- /dev/null +++ b/graphdoc/docs/graphdoc.config.rst @@ -0,0 +1,8 @@ +graphdoc.config module +====================== + +.. automodule:: graphdoc.config + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst new file mode 100644 index 0000000..86f9ac0 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.dspy\_data\_helper module +================================================== + +.. automodule:: graphdoc.data.dspy_data.dspy_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst new file mode 100644 index 0000000..0105afa --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.generation\_data\_helper module +======================================================== + +.. automodule:: graphdoc.data.dspy_data.generation_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst new file mode 100644 index 0000000..f594ba3 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.quality\_data\_helper module +===================================================== + +.. automodule:: graphdoc.data.dspy_data.quality_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.rst b/graphdoc/docs/graphdoc.data.dspy_data.rst index 75d31db..4173822 100644 --- a/graphdoc/docs/graphdoc.data.dspy_data.rst +++ b/graphdoc/docs/graphdoc.data.dspy_data.rst @@ -1,37 +1,18 @@ graphdoc.data.dspy\_data package ================================ -Submodules ----------- - -graphdoc.data.dspy\_data.dspy\_data\_helper module --------------------------------------------------- - -.. automodule:: graphdoc.data.dspy_data.dspy_data_helper - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.dspy\_data.generation\_data\_helper module --------------------------------------------------------- - -.. automodule:: graphdoc.data.dspy_data.generation_data_helper +.. automodule:: graphdoc.data.dspy_data :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.data.dspy\_data.quality\_data\_helper module ------------------------------------------------------ - -.. automodule:: graphdoc.data.dspy_data.quality_data_helper - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data.dspy_data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.dspy_data.dspy_data_helper + graphdoc.data.dspy_data.generation_data_helper + graphdoc.data.dspy_data.quality_data_helper diff --git a/graphdoc/docs/graphdoc.data.helper.rst b/graphdoc/docs/graphdoc.data.helper.rst new file mode 100644 index 0000000..69f5dc6 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.helper module +=========================== + +.. automodule:: graphdoc.data.helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.local.rst b/graphdoc/docs/graphdoc.data.local.rst new file mode 100644 index 0000000..a724702 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.local.rst @@ -0,0 +1,8 @@ +graphdoc.data.local module +========================== + +.. automodule:: graphdoc.data.local + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst b/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst new file mode 100644 index 0000000..4729e54 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.mlflow\_data.mlflow\_data\_helper module +====================================================== + +.. automodule:: graphdoc.data.mlflow_data.mlflow_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.mlflow_data.rst b/graphdoc/docs/graphdoc.data.mlflow_data.rst index 7505d93..71330b8 100644 --- a/graphdoc/docs/graphdoc.data.mlflow_data.rst +++ b/graphdoc/docs/graphdoc.data.mlflow_data.rst @@ -1,21 +1,16 @@ graphdoc.data.mlflow\_data package ================================== -Submodules ----------- - -graphdoc.data.mlflow\_data.mlflow\_data\_helper module ------------------------------------------------------- - -.. automodule:: graphdoc.data.mlflow_data.mlflow_data_helper +.. automodule:: graphdoc.data.mlflow_data :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data.mlflow_data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.mlflow_data.mlflow_data_helper diff --git a/graphdoc/docs/graphdoc.data.parser.rst b/graphdoc/docs/graphdoc.data.parser.rst new file mode 100644 index 0000000..93dfdef --- /dev/null +++ b/graphdoc/docs/graphdoc.data.parser.rst @@ -0,0 +1,8 @@ +graphdoc.data.parser module +=========================== + +.. automodule:: graphdoc.data.parser + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.rst b/graphdoc/docs/graphdoc.data.rst index 9f059b8..ab4e00d 100644 --- a/graphdoc/docs/graphdoc.data.rst +++ b/graphdoc/docs/graphdoc.data.rst @@ -1,6 +1,12 @@ graphdoc.data package ===================== +.. automodule:: graphdoc.data + :members: + :undoc-members: + :show-inheritance: + :noindex: + Subpackages ----------- @@ -13,42 +19,10 @@ Subpackages Submodules ---------- -graphdoc.data.helper module ---------------------------- - -.. automodule:: graphdoc.data.helper - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.local module --------------------------- - -.. automodule:: graphdoc.data.local - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.parser module ---------------------------- - -.. automodule:: graphdoc.data.parser - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.schema module ---------------------------- - -.. automodule:: graphdoc.data.schema - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.helper + graphdoc.data.local + graphdoc.data.parser + graphdoc.data.schema diff --git a/graphdoc/docs/graphdoc.data.schema.rst b/graphdoc/docs/graphdoc.data.schema.rst new file mode 100644 index 0000000..6d1eeb4 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.schema.rst @@ -0,0 +1,8 @@ +graphdoc.data.schema module +=========================== + +.. automodule:: graphdoc.data.schema + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst b/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst new file mode 100644 index 0000000..46f1bcd --- /dev/null +++ b/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst @@ -0,0 +1,8 @@ +graphdoc.eval.doc\_generator\_eval module +========================================= + +.. automodule:: graphdoc.eval.doc_generator_eval + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.eval.rst b/graphdoc/docs/graphdoc.eval.rst index 8b095e3..cdebde8 100644 --- a/graphdoc/docs/graphdoc.eval.rst +++ b/graphdoc/docs/graphdoc.eval.rst @@ -1,21 +1,16 @@ graphdoc.eval package ===================== -Submodules ----------- - -graphdoc.eval.doc\_generator\_eval module ------------------------------------------ - -.. automodule:: graphdoc.eval.doc_generator_eval +.. automodule:: graphdoc.eval :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.eval - :members: - :undoc-members: - :show-inheritance: + graphdoc.eval.doc_generator_eval diff --git a/graphdoc/docs/graphdoc.main.rst b/graphdoc/docs/graphdoc.main.rst new file mode 100644 index 0000000..7dc5c2f --- /dev/null +++ b/graphdoc/docs/graphdoc.main.rst @@ -0,0 +1,8 @@ +graphdoc.main module +==================== + +.. automodule:: graphdoc.main + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.modules.doc_generator_module.rst b/graphdoc/docs/graphdoc.modules.doc_generator_module.rst new file mode 100644 index 0000000..795ce3a --- /dev/null +++ b/graphdoc/docs/graphdoc.modules.doc_generator_module.rst @@ -0,0 +1,8 @@ +graphdoc.modules.doc\_generator\_module module +============================================== + +.. automodule:: graphdoc.modules.doc_generator_module + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.modules.rst b/graphdoc/docs/graphdoc.modules.rst index b8bfd19..3a3b65e 100644 --- a/graphdoc/docs/graphdoc.modules.rst +++ b/graphdoc/docs/graphdoc.modules.rst @@ -1,21 +1,16 @@ graphdoc.modules package ======================== -Submodules ----------- - -graphdoc.modules.doc\_generator\_module module ----------------------------------------------- - -.. automodule:: graphdoc.modules.doc_generator_module +.. automodule:: graphdoc.modules :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.modules - :members: - :undoc-members: - :show-inheritance: + graphdoc.modules.doc_generator_module diff --git a/graphdoc/docs/graphdoc.prompts.rst b/graphdoc/docs/graphdoc.prompts.rst index cf530c9..5ffc90f 100644 --- a/graphdoc/docs/graphdoc.prompts.rst +++ b/graphdoc/docs/graphdoc.prompts.rst @@ -1,37 +1,18 @@ graphdoc.prompts package ======================== -Submodules ----------- - -graphdoc.prompts.schema\_doc\_generation module ------------------------------------------------ - -.. automodule:: graphdoc.prompts.schema_doc_generation - :members: - :undoc-members: - :show-inheritance: - -graphdoc.prompts.schema\_doc\_quality module --------------------------------------------- - -.. automodule:: graphdoc.prompts.schema_doc_quality +.. automodule:: graphdoc.prompts :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.prompts.single\_prompt module --------------------------------------- - -.. automodule:: graphdoc.prompts.single_prompt - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.prompts - :members: - :undoc-members: - :show-inheritance: + graphdoc.prompts.schema_doc_generation + graphdoc.prompts.schema_doc_quality + graphdoc.prompts.single_prompt diff --git a/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst b/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst new file mode 100644 index 0000000..6c0b4bc --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.schema\_doc\_generation module +=============================================== + +.. automodule:: graphdoc.prompts.schema_doc_generation + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst b/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst new file mode 100644 index 0000000..cecd62b --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.schema\_doc\_quality module +============================================ + +.. automodule:: graphdoc.prompts.schema_doc_quality + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.prompts.single_prompt.rst b/graphdoc/docs/graphdoc.prompts.single_prompt.rst new file mode 100644 index 0000000..1a614ef --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.single_prompt.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.single\_prompt module +====================================== + +.. automodule:: graphdoc.prompts.single_prompt + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.rst b/graphdoc/docs/graphdoc.rst index 502daff..1425159 100644 --- a/graphdoc/docs/graphdoc.rst +++ b/graphdoc/docs/graphdoc.rst @@ -1,6 +1,12 @@ graphdoc package ================ +.. automodule:: graphdoc + :members: + :undoc-members: + :show-inheritance: + :noindex: + Subpackages ----------- @@ -16,26 +22,8 @@ Subpackages Submodules ---------- -graphdoc.config module ----------------------- - -.. automodule:: graphdoc.config - :members: - :undoc-members: - :show-inheritance: - -graphdoc.main module --------------------- - -.. automodule:: graphdoc.main - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc - :members: - :undoc-members: - :show-inheritance: + graphdoc.config + graphdoc.main diff --git a/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst b/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst new file mode 100644 index 0000000..d3d8ab9 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.doc\_generator\_trainer module +============================================= + +.. automodule:: graphdoc.train.doc_generator_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst b/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst new file mode 100644 index 0000000..596f769 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.doc\_quality\_trainer module +=========================================== + +.. automodule:: graphdoc.train.doc_quality_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.optimizers.rst b/graphdoc/docs/graphdoc.train.optimizers.rst new file mode 100644 index 0000000..b99ad09 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.optimizers.rst @@ -0,0 +1,8 @@ +graphdoc.train.optimizers module +================================ + +.. automodule:: graphdoc.train.optimizers + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.rst b/graphdoc/docs/graphdoc.train.rst index d6d3f32..f87cd03 100644 --- a/graphdoc/docs/graphdoc.train.rst +++ b/graphdoc/docs/graphdoc.train.rst @@ -1,45 +1,19 @@ graphdoc.train package ====================== -Submodules ----------- - -graphdoc.train.doc\_generator\_trainer module ---------------------------------------------- - -.. automodule:: graphdoc.train.doc_generator_trainer - :members: - :undoc-members: - :show-inheritance: - -graphdoc.train.doc\_quality\_trainer module -------------------------------------------- - -.. automodule:: graphdoc.train.doc_quality_trainer - :members: - :undoc-members: - :show-inheritance: - -graphdoc.train.optimizers module --------------------------------- - -.. automodule:: graphdoc.train.optimizers +.. automodule:: graphdoc.train :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.train.single\_prompt\_trainer module ---------------------------------------------- - -.. automodule:: graphdoc.train.single_prompt_trainer - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.train - :members: - :undoc-members: - :show-inheritance: + graphdoc.train.doc_generator_trainer + graphdoc.train.doc_quality_trainer + graphdoc.train.optimizers + graphdoc.train.single_prompt_trainer diff --git a/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst b/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst new file mode 100644 index 0000000..4587e46 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.single\_prompt\_trainer module +============================================= + +.. automodule:: graphdoc.train.single_prompt_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/index.rst b/graphdoc/docs/index.rst index 3db22af..3b6f486 100644 --- a/graphdoc/docs/index.rst +++ b/graphdoc/docs/index.rst @@ -6,15 +6,13 @@ GraphDoc documentation ====================== -.. include:: links.rst - -`GraphDoc` is a tool for generating GraphQL documentation given a GraphQL schema. It is coupled with tooling for interacting with -`MLflow `_ for tracking and logging. At the root of the `graphdoc `_ directory, -you will find a `mlflow-manager `_ directory that can help you setup and manage a local MLflow server. Within the `graphdoc` directory, you will -find a `run.sh `_ script that can help you get started with a majority of common workflows that you may want to run. Just reach open an issue, or +GraphDoc is a tool for generating GraphQL documentation given a GraphQL schema. It is coupled with tooling for interacting with +MLflow for tracking and logging. At the root of the graphdoc directory, +you will find a mlflow-manager directory that can help you setup and manage a local MLflow server. Within the graphdoc directory, you will +find a run.sh script that can help you get started with a majority of common workflows that you may want to run. Just reach open an issue, or submit a PR, if you have any questions or feedback. Thanks! -License: `Apache License 2.0 `_ +License: Apache License 2.0 .. toctree:: diff --git a/graphdoc/docs/links.rst b/graphdoc/docs/links.rst deleted file mode 100644 index e796213..0000000 --- a/graphdoc/docs/links.rst +++ /dev/null @@ -1,5 +0,0 @@ -.. _mlflow: https://mlflow.org/ -.. _graphdoc_repo: https://github.com/semiotic-ai/graphdoc -.. _mlflow_manager: https://github.com/semiotic-ai/graphdoc/tree/main/mlflow-manager -.. _run_script: https://github.com/semiotic-ai/graphdoc/blob/main/run.sh -.. _Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file diff --git a/graphdoc/docs/modules.rst b/graphdoc/docs/modules.rst index 06251c7..6a8e6e1 100644 --- a/graphdoc/docs/modules.rst +++ b/graphdoc/docs/modules.rst @@ -5,3 +5,4 @@ graphdoc :maxdepth: 4 graphdoc + tests diff --git a/graphdoc/docs/source/graphdoc.data.rst b/graphdoc/docs/source/graphdoc.data.rst index 9d5cba5..bebca7d 100644 --- a/graphdoc/docs/source/graphdoc.data.rst +++ b/graphdoc/docs/source/graphdoc.data.rst @@ -11,6 +11,7 @@ graphdoc.data.helper module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.local module -------------------------- @@ -19,6 +20,7 @@ graphdoc.data.local module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.parser module --------------------------- @@ -27,6 +29,7 @@ graphdoc.data.parser module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.schema module --------------------------- @@ -35,6 +38,7 @@ graphdoc.data.schema module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -43,3 +47,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/graphdoc.prompts.rst b/graphdoc/docs/source/graphdoc.prompts.rst index cf530c9..e6308e0 100644 --- a/graphdoc/docs/source/graphdoc.prompts.rst +++ b/graphdoc/docs/source/graphdoc.prompts.rst @@ -11,6 +11,7 @@ graphdoc.prompts.schema\_doc\_generation module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.prompts.schema\_doc\_quality module -------------------------------------------- @@ -19,6 +20,7 @@ graphdoc.prompts.schema\_doc\_quality module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.prompts.single\_prompt module -------------------------------------- @@ -27,6 +29,7 @@ graphdoc.prompts.single\_prompt module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -35,3 +38,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/graphdoc.rst b/graphdoc/docs/source/graphdoc.rst index aae48c1..bd10c9e 100644 --- a/graphdoc/docs/source/graphdoc.rst +++ b/graphdoc/docs/source/graphdoc.rst @@ -20,6 +20,7 @@ graphdoc.main module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -28,3 +29,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/modules.rst b/graphdoc/docs/source/modules.rst deleted file mode 100644 index 06251c7..0000000 --- a/graphdoc/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -graphdoc -======== - -.. toctree:: - :maxdepth: 4 - - graphdoc diff --git a/graphdoc/docs/tests.conftest.rst b/graphdoc/docs/tests.conftest.rst new file mode 100644 index 0000000..6755d2b --- /dev/null +++ b/graphdoc/docs/tests.conftest.rst @@ -0,0 +1,8 @@ +tests.conftest module +===================== + +.. automodule:: tests.conftest + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/tests.rst b/graphdoc/docs/tests.rst new file mode 100644 index 0000000..c201516 --- /dev/null +++ b/graphdoc/docs/tests.rst @@ -0,0 +1,18 @@ +tests package +============= + +.. automodule:: tests + :members: + :undoc-members: + :show-inheritance: + :noindex: + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + tests.conftest + tests.test_confest + tests.test_config diff --git a/graphdoc/docs/tests.test_confest.rst b/graphdoc/docs/tests.test_confest.rst new file mode 100644 index 0000000..c13b703 --- /dev/null +++ b/graphdoc/docs/tests.test_confest.rst @@ -0,0 +1,8 @@ +tests.test\_confest module +========================== + +.. automodule:: tests.test_confest + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/tests.test_config.rst b/graphdoc/docs/tests.test_config.rst new file mode 100644 index 0000000..97c1aa6 --- /dev/null +++ b/graphdoc/docs/tests.test_config.rst @@ -0,0 +1,8 @@ +tests.test\_config module +========================= + +.. automodule:: tests.test_config + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/graphdoc/config.py b/graphdoc/graphdoc/config.py index 6ac20be..07d055b 100644 --- a/graphdoc/graphdoc/config.py +++ b/graphdoc/graphdoc/config.py @@ -35,23 +35,31 @@ def mlflow_data_helper_from_dict(mlflow_config: dict) -> MlflowDataHelper: - """Load a mlflow data helper from a dictionary of parameters. + """Load a MLflow data helper from a dictionary of parameters. - :param mlflow_config: Dictionary containing mlflow parameters. - :type mlflow_config: dict + The following keys are expected: + - mlflow_tracking_uri + - mlflow_tracking_username (optional) + - mlflow_tracking_password (optional) + + .. code-block:: python - .. code-block:: json { "mlflow_tracking_uri": "http://localhost:5000", "mlflow_tracking_username": "admin", "mlflow_tracking_password": "password" } + :param mlflow_config: Dictionary containing MLflow parameters. + :type mlflow_config: dict + :return: A MlflowDataHelper object. + :rtype: MlflowDataHelper + """ return MlflowDataHelper( mlflow_tracking_uri=mlflow_config["mlflow_tracking_uri"], - mlflow_tracking_username=mlflow_config["mlflow_tracking_username"], - mlflow_tracking_password=mlflow_config["mlflow_tracking_password"], + mlflow_tracking_username=mlflow_config.get("mlflow_tracking_username", None), + mlflow_tracking_password=mlflow_config.get("mlflow_tracking_password", None), ) @@ -62,6 +70,7 @@ def mlflow_data_helper_from_yaml(yaml_path: Union[str, Path]) -> MlflowDataHelpe :type yaml_path: Union[str, Path] .. code-block:: yaml + mlflow: mlflow_tracking_uri: !env MLFLOW_TRACKING_URI # The tracking URI for MLflow mlflow_tracking_username: !env MLFLOW_TRACKING_USERNAME # The username for the mlflow tracking server @@ -81,6 +90,7 @@ def trainset_from_dict(trainset_dict: dict) -> List[dspy.Example]: """Load a trainset from a dictionary of parameters. .. code-block:: yaml + { "hf_api_key": !env HF_DATASET_KEY, # Must be a valid Hugging # Face API key @@ -151,6 +161,7 @@ def trainset_from_yaml(yaml_path: Union[str, Path]) -> List[dspy.Example]: """Load a trainset from a YAML file. .. code-block:: yaml + data: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -189,10 +200,12 @@ def split_trainset( ) -> tuple[List[dspy.Example], List[dspy.Example]]: """Split a trainset into a trainset and evalset. - :param trainset: The trainset to split. :type trainset: List[dspy.Example] - :param evalset_ratio: The proportionate size of the evalset. :type - evalset_ratio: float :return: A tuple of trainset and evalset. :rtype: - tuple[List[dspy.Example], List[dspy.Example]] + :param trainset: The trainset to split. + :type trainset: List[dspy.Example] + :param evalset_ratio: The proportionate size of the evalset. + :type evalset_ratio: float + :return: A tuple of trainset and evalset. + :rtype: tuple[List[dspy.Example], List[dspy.Example]] """ random.seed(seed) @@ -209,6 +222,7 @@ def trainset_and_evalset_from_yaml( """Load a trainset and evalset from a YAML file. .. code-block:: yaml + data: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -253,28 +267,25 @@ def single_prompt_from_dict( ) -> SinglePrompt: """Load a single prompt from a dictionary of parameters. - .. code-block:: json + .. code-block:: python + { "prompt": "doc_quality", # Which prompt signature to use "class": "SchemaDocQualityPrompt", # Must be a child of SinglePrompt - "type": "predict", # The type of prompt to use - # (predict, chain_of_thought) - "metric": "rating", # The type of metric to use - # (rating, category) - "load_from_mlflow": false, # Whether to load the prompt from an MLFlow URI + "type": "predict", # Must be one of predict, generate + "metric": "rating", # The metric to use for evaluation + "load_from_mlflow": false, # Whether to load the prompt from MLflow "model_uri": null, # The tracking URI for MLflow "model_name": null, # The name of the model in MLflow "model_version": null # The version of the model in MLflow - "prompt_metric": False # Whether another prompt is used - # to calculate the metric - # (in which case we must also load that prompt) } - :param prompt_dict: Dictionary containing prompt information. + :param prompt_dict: Dictionary containing prompt parameters. :type prompt_dict: dict - :param prompt_metric: The metric to use to calculate the metric. - Can be another prompt signature or a string. + :param prompt_metric: The prompt to use for the metric. :type prompt_metric: Union[str, SinglePrompt] + :param mlflow_dict: Dictionary containing MLflow parameters. + :type mlflow_dict: Optional[dict] :return: A SinglePrompt object. :rtype: SinglePrompt @@ -307,6 +318,7 @@ def single_prompt_from_yaml(yaml_path: Union[str, Path]) -> SinglePrompt: """Load a single prompt from a YAML file. .. code-block:: yaml + prompt: prompt: base_doc_gen # Which prompt signature to use class: DocGeneratorPrompt # Must be a child of SinglePrompt @@ -367,7 +379,8 @@ def single_trainer_from_dict( ) -> SinglePromptTrainer: """Load a single trainer from a dictionary of parameters. - .. code-block:: json + .. code-block:: python + { "trainer": { "class": "DocQualityTrainer", @@ -418,6 +431,7 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer """Load a single prompt trainer from a YAML file. .. code-block:: yaml + trainer: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -450,21 +464,6 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer model_version: null # The version of the model in MLflow prompt_metric: true # Whether another prompt is used # to calculate the metric - # (in which case we must load prompt) - - prompt_metric: - prompt: doc_quality # The prompt to use to calculate the metric - class: DocQualityPrompt # The class of the prompt to use - # to calculate the metric - type: predict # The type of prompt to use - # to calculate the metric - metric: rating # The metric to use to calculate - # the metric - load_from_mlflow: false # Whether to load the prompt - # from an MLFlow URI - model_uri: null # The tracking URI for MLflow - model_name: null # The name of the model in MLflow - model_version: null # The version of the model in MLflow :param yaml_path: Path to the YAML file. :type yaml_path: Union[str, Path] @@ -488,9 +487,10 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer def doc_generator_module_from_dict( module_dict: dict, prompt: Union[DocGeneratorPrompt, SinglePrompt] ) -> DocGeneratorModule: - """Load a doc generator module from a dictionary of parameters. + """Load a single doc generator module from a dictionary of parameters. + + .. code-block:: python - .. code-block:: json { "retry": true, "retry_limit": 1, @@ -519,6 +519,7 @@ def doc_generator_module_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorM """Load a doc generator module from a YAML file. .. code-block:: yaml + prompt: prompt: base_doc_gen # Which prompt signature to use class: DocGeneratorPrompt # Must be a child of SinglePrompt @@ -575,6 +576,7 @@ def doc_generator_eval_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorEva """Load a doc generator evaluator from a YAML file. .. code-block:: yaml + mlflow: mlflow_tracking_uri: !env MLFLOW_TRACKING_URI # The tracking URI for MLflow mlflow_tracking_username: !env MLFLOW_TRACKING_USERNAME # The username for the mlflow tracking server @@ -620,33 +622,13 @@ def doc_generator_eval_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorEva :rtype: DocGeneratorEvaluator """ - # load the generator - generator = doc_generator_module_from_yaml(yaml_path) config = load_yaml_config(yaml_path) - - # load the evaluator - metric_config = config["prompt_metric"] - evaluator = single_prompt_from_dict(metric_config, metric_config["metric"]) - - # load the eval config - mdh = mlflow_data_helper_from_yaml(yaml_path) # noqa: F841 - mlflow_tracking_uri = config["mlflow"]["mlflow_tracking_uri"] - mlflow_experiment_name = config["eval"]["mlflow_experiment_name"] - generator_prediction_field = config["eval"]["generator_prediction_field"] - evaluator_prediction_field = config["eval"]["evaluator_prediction_field"] - readable_value = config["eval"]["readable_value"] - - # load the evalset - evalset = trainset_from_yaml(yaml_path) - - # return the evaluator + module = doc_generator_module_from_yaml(yaml_path) return DocGeneratorEvaluator( - generator=generator, - evaluator=evaluator, - evalset=evalset, - mlflow_tracking_uri=mlflow_tracking_uri, - mlflow_experiment_name=mlflow_experiment_name, - generator_prediction_field=generator_prediction_field, - evaluator_prediction_field=evaluator_prediction_field, - readable_value=readable_value, + tracking_uri=config["eval"]["mlflow_tracking_uri"], + experiment_name=config["eval"]["mlflow_experiment_name"], + module=module, + generator_prediction_field=config["eval"]["generator_prediction_field"], + evaluator_prediction_field=config["eval"]["evaluator_prediction_field"], + readable_value=config["eval"]["readable_value"], ) diff --git a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py index 75aeb6d..874141d 100644 --- a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py @@ -20,7 +20,10 @@ class DspyDataHelper(ABC): - """Abstract class for creating data objects related to a given dspy.Signature.""" + """Abstract class for creating data objects related to a given dspy.Signature. + + :no-index: + """ ####################### # Class Methods # diff --git a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py index b261d5f..482147d 100644 --- a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py @@ -20,6 +20,8 @@ class GenerationDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Generation dspy.Signature. + + :no-index: The example signature is defined as: ``` diff --git a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py index 3625faa..78c0273 100644 --- a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py @@ -20,15 +20,18 @@ class QualityDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Quality dspy.Signature. + + :no-index: The example signature is defined as: - ``` - database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) - rating: Literal[4, 3, 2, 1] = dspy.OutputField() - ``` + + .. code-block:: python + + database_schema: str = dspy.InputField() + category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( + dspy.OutputField() + ) + rating: Literal[4, 3, 2, 1] = dspy.OutputField() """ diff --git a/graphdoc/graphdoc/data/helper.py b/graphdoc/graphdoc/data/helper.py index 2c3887d..00f3a7b 100644 --- a/graphdoc/graphdoc/data/helper.py +++ b/graphdoc/graphdoc/data/helper.py @@ -39,8 +39,11 @@ def check_directory_path(directory_path: Union[str, Path]) -> None: def check_file_path(file_path: Union[str, Path]) -> None: """Check if the provided path resolves to a valid file. - :param file_path: The path to check. :type file_path: Union[str, Path] :raises - ValueError: If the path does not resolve to a valid file. :return: None :rtype: None + :param file_path: The path to check. + :type file_path: Union[str, Path] + :raises ValueError: If the path does not resolve to a valid file. + :return: None + :rtype: None """ _file_path = Path(file_path).resolve() diff --git a/graphdoc/graphdoc/data/local.py b/graphdoc/graphdoc/data/local.py index 4072177..d5209d2 100644 --- a/graphdoc/graphdoc/data/local.py +++ b/graphdoc/graphdoc/data/local.py @@ -30,6 +30,8 @@ # check out how pytorch etc. handles loading in something like imagenet class LocalDataHelper: """A helper class for loading data from a directory. + + :no-index: :param schema_directory_path: The path to the directory containing the schemas :type schema_directory_path: Union[str, Path] Defaults to the path to the schemas in @@ -68,10 +70,14 @@ def schema_objects_from_folder( ) -> dict[str, SchemaObject]: """Load schemas from a folder, keeping the difficulty tag. - :param folder_path: The path to the folder containing the schemas :type - folder_path: Union[str, Path] :param category: The category of the schemas :type - category: str :param rating: The rating of the schemas :type rating: int - :return: A dictionary of schemas :rtype: dict[str, SchemaObject] + :param category: The category of the schemas + :type category: str + :param rating: The rating of the schemas + :type rating: int + :param folder_path: The path to the folder containing the schemas + :type folder_path: Union[str, Path] + :return: A dictionary of schemas + :rtype: dict[str, SchemaObject] """ check_directory_path(folder_path) @@ -147,11 +153,16 @@ def folder_to_dataset( ) -> Dataset: """Load a folder of schemas, keeping the difficulty tag. - :param category: The category of the schemas :type category: str :param - folder_path: The path to the folder containing the schemas :type folder_path: - Union[str, Path] :param parse_objects: Whether to parse the objects from the - schemas :type parse_objects: bool :param type_mapping: A dictionary mapping - types to strings :type type_mapping: Optional[dict[type, str]] + :param category: The category of the schemas + :type category: str + :param folder_path: The path to the folder containing the schemas + :type folder_path: Union[str, Path] + :param parse_objects: Whether to parse the objects from the schemas + :type parse_objects: bool + :param type_mapping: A dictionary mapping types to strings + :type type_mapping: Optional[dict[type, str]] + :return: A dataset containing the schemas + :rtype: Dataset """ objects = [] @@ -183,10 +194,14 @@ def folder_of_folders_to_dataset( """Load a folder of folders containing schemas, keeping the difficulty tag. :param folder_paths: Enum class defining folder paths, defaults to - SchemaCategoryPath. Must have a get_path method. :type folder_paths: Type[Enum] - :param parse_objects: Whether to parse the objects from the schemas :type - parse_objects: bool :param type_mapping: A dictionary mapping graphql-ast node - values to strings + SchemaCategoryPath. Must have a get_path method. + :type folder_paths: Type[Enum] + :param parse_objects: Whether to parse the objects from the schemas + :type parse_objects: bool + :param type_mapping: A dictionary mapping graphql-ast node values to strings + :type type_mapping: Optional[dict[type, str]] + :return: A dataset containing the schemas + :rtype: Dataset """ schema_objects = self.schema_objects_from_folder_of_folders( diff --git a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py index 161e989..1016999 100644 --- a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py +++ b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py @@ -29,12 +29,15 @@ def __init__( mlflow_tracking_password: Optional[str] = None, ): """A helper class for loading and saving models and metadata from mlflow. + + :no-index: - :param mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: Union[str, Path] :param mlflow_tracking_username: The - username for the mlflow tracking server. :type mlflow_tracking_username: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: Union[str, Path] + :param mlflow_tracking_username: The username for the mlflow tracking server. + :type mlflow_tracking_username: Optional[str] :param mlflow_tracking_password: The password for the mlflow tracking server. - :type mlflow_tracking_password: str + :type mlflow_tracking_password: Optional[str] """ self.mlflow_tracking_uri = mlflow_tracking_uri @@ -90,8 +93,10 @@ def latest_model_version(self, model_name: str): def model_by_name_and_version(self, model_name: str, model_version: str): """Load a model from mlflow by name and version. - :param model_name: The name of the model to load. :type model_name: str :param - model_version: The version of the model to load. :type model_version: str + :param model_name: The name of the model to load. + :type model_name: str + :param model_version: The version of the model to load. + :type model_version: str :return: The loaded model. """ @@ -139,10 +144,12 @@ def save_model( ): """Save a model to mlflow. - :param model: The model to save. :type model: dspy.Signature :param - model_signature: The signature of the model. :type model_signature: - ModelSignature :param model_name: The name of the model to save. :type - model_name: str + :param model: The model to save. + :type model: dspy.Signature + :param model_signature: The signature of the model. + :type model_signature: ModelSignature + :param model_name: The name of the model to save. + :type model_name: str """ mlflow.dspy.log_model( diff --git a/graphdoc/graphdoc/data/parser.py b/graphdoc/graphdoc/data/parser.py index 2a53a71..e5ec0fb 100644 --- a/graphdoc/graphdoc/data/parser.py +++ b/graphdoc/graphdoc/data/parser.py @@ -30,7 +30,10 @@ class Parser: - """A class for parsing and handling of GraphQL objects.""" + """A class for parsing and handling of GraphQL objects. + + :no-index: + """ DEFAULT_NODE_TYPES = { DocumentNode: "full schema", @@ -48,9 +51,12 @@ def _check_node_type( ) -> str: """Check the type of a schema node. - :param node: The schema node to check :type node: Node :param type_mapping: - Custom mapping of node types to strings. Defaults to DEFAULT_NODE_TYPES :type - type_mapping: Optional[dict[type, str]] :return: The type of the schema node + :param node: The schema node to check + :type node: Node + :param type_mapping: Custom mapping of node types to strings. Defaults to + DEFAULT_NODE_TYPES + :type type_mapping: Optional[dict[type, str]] + :return: The type of the schema node :rtype: str """ @@ -65,11 +71,10 @@ def parse_schema_from_file( ) -> DocumentNode: """Parse a schema from a file. - :param schema_file: The name of the schema file :type schema_ - file: - str + :param schema_file: The name of the schema file + :type schema_file: Union[str, Path] :param schema_directory_path: A path to a directory containing schemas - :type schema_directory_path: str + :type schema_directory_path: Optional[Union[str, Path]] :return: The parsed schema :rtype: DocumentNode :raises Exception: If the schema cannot be parsed @@ -183,12 +188,18 @@ def fill_empty_descriptions( with the new column or table value. Do not update descriptions that already have a value. Default values are provided for the new column and table descriptions. - :param node: The GraphQL node to update :type node: Node :param - new_column_value: The new column description value :type new_column_value: str - :param new_table_value: The new table description value :type new_table_value: - str :param use_value_name: Whether to use the value name in the description - :type use_value_name: bool :param value_name: The name of the value :type - value_name: Optional[str] :return: The updated node :rtype: Node + :param node: The GraphQL node to update + :type node: Node + :param new_column_value: The new column description value + :type new_column_value: str + :param new_table_value: The new table description value + :type new_table_value: str + :param use_value_name: Whether to use the value name in the description + :type use_value_name: bool + :param value_name: The name of the value + :type value_name: Optional[str] + :return: The updated node + :rtype: Node """ if hasattr(node, "description"): # and node.description == None: @@ -267,9 +278,12 @@ def schema_equality_check(gold_node: Node, check_node: Node) -> bool: """A method to check if two schema nodes are equal. Only checks that the schemas structures are equal, not the descriptions. - :param gold_node: The gold standard schema node :type gold_node: Node :param - check_node: The schema node to check :type check_node: Node :return: Whether the - schemas are equal :rtype: bool + :param gold_node: The gold standard schema node + :type gold_node: Node + :param check_node: The schema node to check + :type check_node: Node + :return: Whether the schemas are equal + :rtype: bool """ gold_node_copy = copy.deepcopy(gold_node) @@ -314,10 +328,13 @@ def parse_objects_from_full_schema_object( ) -> Union[dict[str, SchemaObject], None]: """Parse out all available tables from a full schema object. - :param schema: The full schema object to parse :type schema: SchemaObject :param - type_mapping: Custom mapping of node types to strings. Defaults to - DEFAULT_NODE_TYPES :type type_mapping: Optional[dict[type, str]] :return: The - parsed objects (tables and enums) :rtype: Union[dict, None] + :param schema: The full schema object to parse + :type schema: SchemaObject + :param type_mapping: Custom mapping of node types to strings. Defaults to + DEFAULT_NODE_TYPES + :type type_mapping: Optional[dict[type, str]] + :return: The parsed objects (tables and enums) + :rtype: Union[dict, None] """ if schema.schema_ast is None: diff --git a/graphdoc/graphdoc/data/schema.py b/graphdoc/graphdoc/data/schema.py index 2d49810..fefb343 100644 --- a/graphdoc/graphdoc/data/schema.py +++ b/graphdoc/graphdoc/data/schema.py @@ -21,6 +21,11 @@ class SchemaCategory(str, Enum): + """Schema quality categories enumeration. + + :no-index: + """ + PERFECT = "perfect" ALMOST_PERFECT = "almost perfect" POOR_BUT_CORRECT = "poor but correct" @@ -36,6 +41,11 @@ def from_str(cls, value: str) -> Optional["SchemaCategory"]: class SchemaRating(str, Enum): + """Schema quality ratings enumeration. + + :no-index: + """ + FOUR = "4" THREE = "3" TWO = "2" @@ -53,7 +63,10 @@ def from_value(cls, value: Union[str, int]) -> Optional["SchemaRating"]: class SchemaCategoryRatingMapping: - """Maps SchemaCategory to SchemaRating.""" + """Mapping between schema categories and ratings. + + :no-index: + """ @staticmethod def get_rating(category: SchemaCategory) -> SchemaRating: @@ -91,6 +104,11 @@ def get_category(rating: SchemaRating) -> SchemaCategory: class SchemaType(str, Enum): + """Schema type enumeration. + + :no-index: + """ + FULL_SCHEMA = "full schema" TABLE_SCHEMA = "table schema" ENUM_SCHEMA = "enum schema" @@ -104,7 +122,10 @@ def from_str(cls, value: str) -> Optional["SchemaType"]: class SchemaCategoryPath(str, Enum): - """Maps schema categories to their folder names.""" + """Maps schema categories to their folder names. + + :no-index: + """ PERFECT = "perfect" ALMOST_PERFECT = "almost_perfect" @@ -134,6 +155,11 @@ def get_path( @dataclass class SchemaObject: + """Schema object containing schema data and metadata. + + :no-index: + """ + key: str category: Optional[Enum] = None rating: Optional[Enum] = None diff --git a/graphdoc/graphdoc/docs/conf.py b/graphdoc/graphdoc/docs/conf.py new file mode 100644 index 0000000..34e3df0 --- /dev/null +++ b/graphdoc/graphdoc/docs/conf.py @@ -0,0 +1,34 @@ +# -- Path setup -------------------------------------------------------------- +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +# -- Extension configuration ------------------------------------------------- + +# Set the primary domain to Python to avoid duplicate object descriptions +primary_domain = 'py' + +# Add autodoc settings to prevent duplicate warnings +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, +} + +# Properly handle duplicates +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} + +# Skip dspy.Signature classes which are causing warnings +def skip_dspy_signatures(app, what, name, obj, skip, options): + import inspect + if inspect.isclass(obj): + # Skip classes that inherit from dspy.Signature + for base in obj.__mro__: + if base.__name__ == 'Signature' and base.__module__.startswith('dspy'): + return True + return skip + +def setup(app): + app.connect('autodoc-skip-member', skip_dspy_signatures) \ No newline at end of file diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index 1b27f42..7a40442 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -30,6 +30,8 @@ def __init__( """Initialize the DocGeneratorModule. A module for generating documentation for a given GraphQL schema. Schemas are decomposed and individually used to generate documentation, with a quality check after each generation. + + :no-index: signature fields are: - database_schema: str = dspy.InputField() @@ -44,6 +46,8 @@ def __init__( :param rating_threshold: The minimum rating for a generated document to be considered valid. :type rating_threshold: int + :param fill_empty_descriptions: Whether to fill empty descriptions with generated documentation. + :type fill_empty_descriptions: bool """ super().__init__() @@ -107,8 +111,10 @@ def _retry_by_rating(self, database_schema: str) -> str: """Retry the generation if the quality check fails. Rating threshold is determined at initialization. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: str + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: str """ @@ -239,9 +245,10 @@ def forward(self, database_schema: str) -> dspy.Prediction: """Given a database schema, generate a documented schema. If retry is True, the generation will be retried if the quality check fails. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: - dspy.Prediction + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: dspy.Prediction """ if self.retry: @@ -261,9 +268,10 @@ def document_full_schema( """Given a database schema, parse out the underlying components and document on a per-component basis. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: - dspy.Prediction + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: dspy.Prediction """ # if we are tracing, make sure make sure we have everything needed to log to mlflow diff --git a/graphdoc/graphdoc/prompts/__init__.py b/graphdoc/graphdoc/prompts/__init__.py index f4aa328..48738b9 100644 --- a/graphdoc/graphdoc/prompts/__init__.py +++ b/graphdoc/graphdoc/prompts/__init__.py @@ -38,13 +38,16 @@ def single_prompt( """Returns an instance of the specified prompt class. Allows for the user to pass in their own dspy signature. - :param prompt: The prompt to use. :type prompt: Union[str, dspy.Signature] - :param prompt_class: The class of the prompt to use. :type prompt_class: str - :param prompt_type: The type of the prompt to use. :type prompt_type: str :param - prompt_metric: The metric to use for the prompt. :type prompt_metric: Union[str, - DocQualityPrompt, SinglePrompt] :return: An instance of the specified prompt - class. :rtype: SinglePrompt - + :param prompt: The prompt to use. + :type prompt: Union[str, dspy.Signature] + :param prompt_class: The class of the prompt to use. + :type prompt_class: str + :param prompt_type: The type of the prompt to use. + :type prompt_type: str + :param prompt_metric: The metric to use for the prompt. + :type prompt_metric: Union[str, DocQualityPrompt, SinglePrompt] + :return: An instance of the specified prompt class. + :rtype: SinglePrompt """ prompt_classes = { "DocQualityPrompt": DocQualityPrompt, diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 7d083f9..5c4b6f0 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -22,19 +22,10 @@ # DSPy Signatures # ################### class DocGeneratorSignature(dspy.Signature): + """A signature that takes a full GraphQL schema and returns a documented schema. + + :no-index: """ - ### TASK: Given a GraphQL Schema, generate a precise description for the columns of the tables in the database. - - ### Requirements: - - Focus solely on confirmed details from the provided schema. - - Keep the description concise and factual. - - Exclude any speculative or additional commentary. - - DO NOT return the phrase "in the { table } table" in your description. - - ### Formatting - - Ensure that the schema maintains proper documentation formatting, as is provided. - - """ # noqa: B950 database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( @@ -43,21 +34,11 @@ class DocGeneratorSignature(dspy.Signature): class DocGeneratorHelperSignature(dspy.Signature): + """A signature that takes a code section that requires a transformation as well as + the current description of that section and returns a new description. + + :no-index: """ - ### TASK: Analyze the provided GraphQL Schema and generate detailed yet concise descriptions for each field within the database tables and enums. - - ### Requirements: - - If the field is unclear, and the documentation result is ambiguous, request additional information: "WARNING: Please provide additional information to avoid confusion". - - Utilize only the verified information from the schema to ensure accuracy. - - Descriptions should be factual, straightforward, and avoid any speculative language. - - Refrain from using the phrase "in the { table } table" within your descriptions. - - Ensure that the documentation adheres to standard schema formatting without modifying the underlying schema structure. - - ### Formatting: - - Maintain consistency with the existing documentation style and structure. - - Focus on clarity and precision to aid developers and system architects in understanding the schema's components effectively. - - """ # noqa: B950 database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( @@ -66,18 +47,11 @@ class DocGeneratorHelperSignature(dspy.Signature): class BadDocGeneratorSignature(dspy.Signature): + """A signature that takes a full GraphQL schema and returns a list of + issues with the schema. + + :no-index: """ - ### TASK: Given a GraphQL Schema, generate intentionally incorrect documentation for the columns of the tables in the database. - - ### Requirements: - - Every table, entity, enum, etc. must have at least one column with a description that is obviosly incorrect. - - The documentation must be incorrect and misleading. - - The documentation should be scattered, with only some columns having documentation. - - ### Formatting - - Ensure that the schema maintains proper documentation formatting, as is provided. - - """ # noqa: B950 database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( @@ -115,6 +89,11 @@ def doc_gen_factory( # Single Prompt Class # ####################### class DocGeneratorPrompt(SinglePrompt): + """DocGeneratorPrompt class for generating documentation for GraphQL schemas. + + :no-index: + """ + def __init__( self, prompt: Union[str, dspy.Signature, dspy.SignatureMeta], @@ -190,13 +169,13 @@ def format_metric( ) -> Dict[str, Any]: """Format the metric results into a dictionary. - :param examples: The examples used to evaluate the metric. - :type examples: List[dspy.Example] - :param overall_score: The overall score of the metric. - :type overall_score: float - :param results: The results of the metric. - :type results: List - :param scores: The scores of the metric. + :param examples: The examples used to evaluate the metric. + :type examples: List[dspy.Example] + :param overall_score: The overall score of the metric. + :type overall_score: float + :param results: The results of the metric. + :type results: List + :param scores: The scores of the metric. :type scores: List """ @@ -217,8 +196,8 @@ def compare_metrics( :param base_metrics: The base metrics. :type base_metrics: Any - :param optimized_metrics: The optimized metrics. - :type + :param optimized_metrics: The optimized metrics. :type + """ if comparison_value == "overall_score": return optimized_metrics.get("overall_score", 0) > base_metrics.get( diff --git a/graphdoc/graphdoc/prompts/schema_doc_quality.py b/graphdoc/graphdoc/prompts/schema_doc_quality.py index a975196..653f34a 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_quality.py +++ b/graphdoc/graphdoc/prompts/schema_doc_quality.py @@ -20,7 +20,10 @@ # DSPy Signatures # ################### class DocQualitySignature(dspy.Signature): - """ + """A signature for evaluating the quality of GraphQL schema documentation. + + :no-index: + You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. The categories are described as: @@ -40,37 +43,10 @@ class DocQualitySignature(dspy.Signature): class DocQualityDemonstrationSignature(dspy.Signature): + """A signature for demonstrating good, average and bad GraphQL schema documentation quality examples. + + :no-index: """ - You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. - - The categories are described as: - - perfect (4): The documentation contains enough information so that the interpretation of the schema and its database content is completely free of ambiguity. - perfect (4) example: - type Domain @entity { - " The namehash (id) of the parent name. References the Domain entity that is the parent of the current domain. Type: Domain " - parent: Domain - } - - almost perfect (3): The documentation is almost perfect and free from ambiguity, but there is room for improvement. - almost perfect (3) example: - type Token @entity { - " Name of the token, mirrored from the smart contract " - name: String! - } - - poor but correct (2): The documentation is poor but correct and has room for improvement due to missing information. The documentation is not incorrect. - poor but correct (2) example: - type InterestRate @entity { - "Description for column: id" - id: ID! - } - - incorrect (1): The documentation is incorrect and contains inaccurate or misleading information. Any incorrect information automatically leads to an incorrect rating, even if some correct information is present. - incorrect (1) example: - type BridgeProtocol implements Protocol @entity { - " Social Security Number of the protocol's main developer " - id: Bytes! - } - Output a number rating that corresponds to the categories described above. - - """ # noqa: B950 database_schema: str = dspy.InputField() category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( @@ -107,6 +83,15 @@ def doc_quality_factory( # Single Prompt Class # ####################### class DocQualityPrompt(SinglePrompt): + """DocQualityPrompt class for evaluating documentation quality. + + :no-index: + + This is a single prompt that can be used to evaluate the quality of the documentation + for a given schema. This is a wrapper around the SinglePrompt class that implements + the abstract methods. + """ + def __init__( self, prompt: Union[ @@ -120,20 +105,20 @@ def __init__( prompt_metric: Union[Literal["rating", "category"], Callable] = "rating", ) -> None: # TODO: we should think about if we want to add checks on any provided dspy.Signature - """Initialize the DocQualityPrompt. This is a single prompt that can be used to - evaluate the quality of the documentation for a given schema. This is a wrapper - around the SinglePrompt class that implements the abstract methods. + """Initialize the DocQualityPrompt. :param prompt: The prompt to use. Can either be a string that maps to a defined - signature, as set in the doc_quality_factory, or a dspy.Signature. :type prompt: - Union[str, dspy.Signature] :param prompt_type: The type of prompt to use. :type - prompt_type: Union[Literal["predict", "chain_of_thought"], Callable] :param - prompt_metric: The metric to use. Can either be a string that maps to a defined - metric, as set in the doc_quality_factory, or a custom callable function. - Function must have the signature (example: dspy.Example, prediction: - dspy.Prediction) -> bool. :type prompt_metric: Union[Literal["rating", - "category"], Callable] + signature, as set in the doc_quality_factory, or a dspy.Signature. + :type prompt: Union[str, dspy.Signature] + + :param prompt_type: The type of prompt to use. + :type prompt_type: Union[Literal["predict", "chain_of_thought"], Callable] + :param prompt_metric: The metric to use. Can either be a string that maps to a defined + metric, as set in the doc_quality_factory, or a custom callable function. + Function must have the signature (example: dspy.Example, prediction: + dspy.Prediction) -> bool. + :type prompt_metric: Union[Literal["rating", "category"], Callable] """ prompt_signature = doc_quality_factory(prompt) super().__init__( @@ -163,12 +148,14 @@ def evaluate_metric( ) -> bool: """Evaluate the metric for the given example and prediction. - :param example: The example to evaluate the metric on. :type example: - dspy.Example :param prediction: The prediction to evaluate the metric on. :type - prediction: dspy.Prediction :param trace: Used for DSPy. :type trace: Any + :param example: The example to evaluate the metric on. + :type example: dspy.Example + :param prediction: The prediction to evaluate the metric on. + :type prediction: dspy.Prediction + :param trace: Used for DSPy. + :type trace: Any :return: The result of the evaluation. A boolean for if the metric is correct. :rtype: bool - """ evaluation_mapping = { "rating": self._evaluate_rating_metric, @@ -280,10 +267,14 @@ def compare_metrics( """Compare the metrics of the base and optimized models. Returns true if the optimized model is better than the base model. - :param base_metrics: The metrics of the base model. :type base_metrics: Any - :param optimized_metrics: The metrics of the optimized model. :type - optimized_metrics: Any :param comparison_value: The value to compare. - + :param base_metrics: The metrics of the base model. + :type base_metrics: Any + :param optimized_metrics: The metrics of the optimized model. + :type optimized_metrics: Any + :param comparison_value: The value to compare. + :type comparison_value: str + :return: True if the optimized model is better than the base model. + :rtype: bool """ if comparison_value == "overall_score": return optimized_metrics["overall_score"] > base_metrics["overall_score"] diff --git a/graphdoc/graphdoc/prompts/single_prompt.py b/graphdoc/graphdoc/prompts/single_prompt.py index c01f480..32be6c4 100644 --- a/graphdoc/graphdoc/prompts/single_prompt.py +++ b/graphdoc/graphdoc/prompts/single_prompt.py @@ -28,6 +28,8 @@ def __init__( # and tighter coupling ) -> None: """Initialize a single prompt. + + :no-index: :param prompt: The prompt to use. :type prompt: dspy.Signature @@ -70,10 +72,12 @@ def evaluate_metric( ) -> Any: """This is the metric used to evalaute the prompt. - :param example: The example to evaluate the metric on. :type example: - dspy.Example :param prediction: The prediction to evaluate the metric on. :type - prediction: dspy.Prediction :param trace: The trace to evaluate the metric on. - This is for DSPy. :type trace: Any + :param example: The example to evaluate the metric on. + :type example: dspy.Example + :param prediction: The prediction to evaluate the metric on. + :type prediction: dspy.Prediction + :param trace: The trace to evaluate the metric on. This is for DSPy. + :type trace: Any """ pass @@ -89,11 +93,14 @@ def format_metric( """This takes the results from the evaluate_evalset and does any necessary formatting, taking into account the metric type. - :param examples: The examples to evaluate the metric on. :type examples: - List[dspy.Example] :param overall_score: The overall score of the metric. :type - overall_score: float :param results: The results from the evaluate_evalset. - :type results: List :param scores: The scores from the evaluate_evalset. :type - scores: List + :param examples: The examples to evaluate the metric on. + :type examples: List[dspy.Example] + :param overall_score: The overall score of the metric. + :type overall_score: float + :param results: The results from the evaluate_evalset. + :type results: List + :param scores: The scores from the evaluate_evalset. + :type scores: List """ pass @@ -108,12 +115,14 @@ def compare_metrics( """Compare the metrics of the base and optimized models. Return true if the optimized model is better than the base model. - :param base_metrics: The metrics of the base model. :type base_metrics: Any - :param optimized_metrics: The metrics of the optimized model. :type - optimized_metrics: Any :param comparison_value: The value to compare the metrics - on. Determines which metric is used to compare the models. :type - comparison_value: str :return: True if the optimized model is better than the - base model, False otherwise. :rtype: bool + :param base_metrics: The metrics of the base model. + :type base_metrics: Any + :param optimized_metrics: The metrics of the optimized model. + :type optimized_metrics: Any + :param comparison_value: The value to compare the metrics on. Determines which metric is used to compare the models. + :type comparison_value: str + :return: True if the optimized model is better than the base model, False otherwise. + :rtype: bool """ pass @@ -127,13 +136,16 @@ def evaluate_evalset( ) -> Dict[str, Any]: """Take in a list of examples and evaluate the results. - :param examples: The examples to evaluate the results on. :type examples: - List[dspy.Example] :param num_threads: The number of threads to use for - evaluation. :type num_threads: int :param display_progress: Whether to display - the progress of the evaluation. :type display_progress: bool :param - display_table: Whether to display the table of the evaluation. :type - display_table: bool :return: A dictionary containing the overall score, results, - and scores. :rtype: Dict[str, Any] + :param examples: The examples to evaluate the results on. + :type examples: List[dspy.Example] + :param num_threads: The number of threads to use for evaluation. + :type num_threads: int + :param display_progress: Whether to display the progress of the evaluation. + :type display_progress: bool + :param display_table: Whether to display the table of the evaluation. + :type display_table: bool + :return: A dictionary containing the overall score, results, and scores. + :rtype: Dict[str, Any] """ evaluator = dspy.Evaluate( diff --git a/graphdoc/graphdoc/run.sh b/graphdoc/graphdoc/run.sh new file mode 100644 index 0000000..044b956 --- /dev/null +++ b/graphdoc/graphdoc/run.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# development and installation commands +python_command() { + poetry run python +} + +shell_command() { + poetry shell +} + +install_command() { + poetry install --without dev +} + +dev_command() { + poetry install --with dev +} + +requirements_command() { + poetry export -f requirements.txt --without-hashes --with dev,docs --output requirements.txt +} + +format_command() { + poetry run black . +} + +docstring_format_command() { + poetry run docformatter --black --style sphinx --in-place --exclude="prompts" --recursive graphdoc/ + poetry run docformatter --black --style sphinx --in-place --recursive runners/ + poetry run docformatter --black --style sphinx --in-place --recursive tests/ +} + +pep8_check_command() { + poetry run flake8 graphdoc/ + poetry run flake8 runners/ + poetry run flake8 tests/ +} + +sort_command() { + poetry run isort . +} + +lint_command() { + poetry run pyright . +} + +test_command() { + poetry run pytest --testmon -p no:warnings +} + +commit_command() { + format_command + docstring_format_command + sort_command + lint_command + pep8_check_command + test_command + requirements_command +} + +# Documentation commands +docs() { + echo "Generating RST files..." + cd docs && python generate_docs.py + + echo "Building documentation..." + cd docs && make clean html + echo "Documentation built in docs/build/html" +} + +docs_init() { + echo "Initializing Sphinx documentation..." + # Remove existing docs directory if it exists + rm -rf docs + # Create fresh docs directory + mkdir -p docs + cd docs + sphinx-quickstart -q \ + -p GraphDoc \ + -a "Semiotic Labs" \ + -v 1.0 \ + -r 1.0 \ + -l en \ + --ext-autodoc \ + --ext-viewcode \ + --makefile \ + --batchfile + # Create necessary directories + mkdir -p source/_static source/_templates + echo "Sphinx documentation initialized" +} + +# train commands +doc_quality_train_command() { + poetry run python runners/train/single_prompt_trainer.py --config-path assets/configs/single_prompt_doc_quality_trainer.yaml +} + +doc_generator_train_command() { + poetry run python runners/train/single_prompt_trainer.py --config-path assets/configs/single_prompt_doc_generator_trainer.yaml +} + +# eval commands +doc_generator_eval_command() { + poetry run python runners/eval/eval_doc_generator_module.py --config-path assets/configs/single_prompt_doc_generator_module_eval.yaml +} + +# help menu +show_help() { + echo "Usage: ./nli [option]" + echo "Options:" + + # development and installation commands + echo " python Run Python" + echo " shell Run shell" + echo " install Install dependencies" + echo " dev Install dependencies with dev" + echo " requirements Generate requirements.txt" + echo " format Format the code" + echo " docstring-format Format the docstrings" + echo " pep-check Check the PEP8 compliance" + echo " lint Lint the code" + echo " test Run the tests" + echo " commit Format, lint, and test the code" + echo " docs Build the documentation" + echo " docs-init Initialize the Sphinx documentation" + + # train commands + echo " doc-quality-train Train a document quality model" + echo " doc-generator-train Train a document generator model" + + # eval commands + echo " doc-generator-eval Evaluate a document generator model" +} + +# handle command line arguments +if [ -z "$1" ]; then + show_help +else + case "$1" in + + # development and installation commands + "python") python_command ;; + "shell") shell_command ;; + "install") install_command ;; + "dev") dev_command ;; + "requirements") requirements_command ;; + "format") format_command ;; + "docstring-format") docstring_format_command ;; + "pep-check") pep8_check_command ;; + "lint") lint_command ;; + "test") test_command ;; + "commit") commit_command ;; + "docs") docs ;; + "docs-init") docs_init ;; + "doc-quality-train") doc_quality_train_command ;; + "doc-generator-train") doc_generator_train_command ;; + "doc-generator-eval") doc_generator_eval_command ;; + *) + echo "Usage: $0 {test|lint|format|docs|docs-init|doc-quality-train|doc-generator-train|doc-generator-eval}" + exit 1 + ;; + esac +fi \ No newline at end of file diff --git a/graphdoc/graphdoc/train/doc_generator_trainer.py b/graphdoc/graphdoc/train/doc_generator_trainer.py index 60c0369..2fd4ecd 100644 --- a/graphdoc/graphdoc/train/doc_generator_trainer.py +++ b/graphdoc/graphdoc/train/doc_generator_trainer.py @@ -35,16 +35,22 @@ def __init__( ): """Initialize the DocGeneratorTrainer. - :param prompt: The prompt to train. :type prompt: DocGeneratorPrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. :type trainset: - List[dspy.Example] :param evalset: The evaluation set. :type evalset: - List[dspy.Example] + :param prompt: The prompt to train. + :type prompt: DocGeneratorPrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ super().__init__( @@ -83,9 +89,10 @@ def evaluation_metrics( ) -> None: """Log evaluation metrics to mlflow. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Dict[str, Any] :param optimized_evaluation: The evaluation - metrics of the optimized model. :type optimized_evaluation: Dict[str, Any] + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Dict[str, Any] + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Dict[str, Any] """ base_evaluation_overall_score = self._calculate_average_score(base_evaluation) @@ -140,11 +147,12 @@ def evaluate_training( def train( self, load_model_args: Optional[Dict[str, Any]] = None, save_model: bool = True ): - """Train the document generator model. + """Train the model. If load_model_args is provided, load the model from MLFlow. - :param load_model_args: The arguments to load the model. :type load_model_args: - Optional[Dict[str, Any]] :param save_model: Whether to save the model. :type - save_model: bool :return: The trained model. :rtype: dspy.ChainOfThought + :param load_model_args: The arguments to load the model from mlflow. + :type load_model_args: Optional[Dict[str, Any]] + :param save_model: Whether to save the model to mlflow. + :type save_model: bool """ # if model args are provided, load the model from mlflow diff --git a/graphdoc/graphdoc/train/doc_quality_trainer.py b/graphdoc/graphdoc/train/doc_quality_trainer.py index 442c9fc..ebbbec5 100644 --- a/graphdoc/graphdoc/train/doc_quality_trainer.py +++ b/graphdoc/graphdoc/train/doc_quality_trainer.py @@ -36,14 +36,22 @@ def __init__( """Initialize the DocQualityTrainer. This is the base class for implementing a trainer for a DocQualityPrompt. - :param prompt: The prompt to train. :type prompt: DocQualityPrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. + :param prompt: The prompt to train. + :type prompt: DocQualityPrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ super().__init__( @@ -64,10 +72,10 @@ def evaluation_metrics(self, base_evaluation, optimized_evaluation): """Log evaluation metrics to mlflow. We will log the overall scores and the per category scores. Per category scores will be logged as a csv file. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the - optimized model. :type optimized_evaluation: Any - + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Any + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Any """ base_evaluation_overall_score = base_evaluation["overall_score"] optimized_evaluation_overall_score = optimized_evaluation["overall_score"] diff --git a/graphdoc/graphdoc/train/single_prompt_trainer.py b/graphdoc/graphdoc/train/single_prompt_trainer.py index 2096400..7c6ec4d 100644 --- a/graphdoc/graphdoc/train/single_prompt_trainer.py +++ b/graphdoc/graphdoc/train/single_prompt_trainer.py @@ -33,16 +33,22 @@ def __init__( """Initialize the SinglePromptTrainer. This is the base class for implementing a trainer for a single prompt. - :param prompt: The prompt to train. :type prompt: SinglePrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. :type trainset: - List[dspy.Example] - + :param prompt: The prompt to train. + :type prompt: SinglePrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ self.prompt = prompt self.optimizer_type = optimizer_type @@ -73,15 +79,14 @@ def __init__( # Abstract Methods # #################### - # TODO: decide on a return type and implement better type checking for parameters @abstractmethod def evaluation_metrics(self, base_evaluation, optimized_evaluation): """Log evaluation metrics to mlflow. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the - optimized model. :type optimized_evaluation: Any - + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Any + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Any """ pass diff --git a/graphdoc/run.sh b/graphdoc/run.sh index 112f52a..73cc0d0 100755 --- a/graphdoc/run.sh +++ b/graphdoc/run.sh @@ -60,10 +60,16 @@ commit_command() { } # Documentation commands +docs_generate() { + echo "Generating RST files..." + cd docs && python generate_docs.py + echo "RST files generated successfully!" +} + docs() { echo "Building documentation..." cd docs && make clean html - echo "Documentation built in docs/build/html" + echo "Documentation built in docs/_build/html" } docs_init() { @@ -119,6 +125,7 @@ show_help() { echo " lint Lint the code" echo " test Run the tests" echo " commit Format, lint, and test the code" + echo " docs-generate Generate documentation RST files" echo " docs Build the documentation" echo " docs-init Initialize the Sphinx documentation" @@ -148,6 +155,7 @@ else "lint") lint_command ;; "test") test_command ;; "commit") commit_command ;; + "docs-generate") docs_generate ;; "docs") docs ;; "docs-init") docs_init ;; "doc-quality-train") doc_quality_train_command ;; diff --git a/run.sh b/run.sh index 8b61d35..f860c2d 100755 --- a/run.sh +++ b/run.sh @@ -42,6 +42,19 @@ mlflow_teardown() { cd .. } +# Documentation commands +docs_generate() { + echo "Generating documentation RST files..." + cd graphdoc && ./run.sh docs-generate + cd .. +} + +docs_build() { + echo "Building documentation..." + cd graphdoc && ./run.sh docs + cd .. +} + # train commands doc_quality_train() { echo "Training a document quality model..." @@ -75,6 +88,10 @@ show_help() { # make commands echo " mlflow-setup Install mlflow-manager dependencies and run the services" echo " mlflow-teardown Teardown mlflow-manager services" + + # documentation commands + echo " docs-generate Generate documentation RST files automatically" + echo " docs-build Build documentation HTML files" # train commands echo " doc-quality-train Train a document quality model" @@ -96,6 +113,10 @@ else # make commands "mlflow-setup") mlflow_setup ;; "mlflow-teardown") mlflow_teardown ;; + + # documentation commands + "docs-generate") docs_generate ;; + "docs-build") docs_build ;; # train commands "doc-quality-train") doc_quality_train ;; From 6023bc55d0f39b8d9b6278d30905809c2720d4d5 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 13:35:55 -0500 Subject: [PATCH 02/13] docs: update docstrings and sphinx build process --- graphdoc/docs/conf.py | 28 ++++--- graphdoc/docs/generate_docs.py | 82 +++++++++++-------- graphdoc/graphdoc/config.py | 38 +++++++-- .../data/dspy_data/dspy_data_helper.py | 3 +- .../data/dspy_data/generation_data_helper.py | 2 +- .../data/dspy_data/quality_data_helper.py | 2 +- graphdoc/graphdoc/data/local.py | 2 +- .../data/mlflow_data/mlflow_data_helper.py | 2 +- graphdoc/graphdoc/data/parser.py | 3 +- graphdoc/graphdoc/data/schema.py | 26 +++--- graphdoc/graphdoc/docs/conf.py | 20 +++-- .../graphdoc/modules/doc_generator_module.py | 5 +- .../graphdoc/prompts/schema_doc_generation.py | 8 +- .../graphdoc/prompts/schema_doc_quality.py | 24 +++--- graphdoc/graphdoc/prompts/single_prompt.py | 5 +- .../graphdoc/train/doc_quality_trainer.py | 1 + .../graphdoc/train/single_prompt_trainer.py | 2 + 17 files changed, 153 insertions(+), 100 deletions(-) diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index 9da3d41..57a2551 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -52,52 +52,58 @@ # add these directories to sys.path here. import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # -- Suppress specific warnings ----------------------------------------------- # This suppresses specific warning types that we want to ignore suppress_warnings = [ - 'autodoc.duplicate_object_description', + "autodoc.duplicate_object_description", ] # Configure autodoc settings to handle duplicate signatures autodoc_default_options = { - 'members': True, - 'undoc-members': True, - 'show-inheritance': True, - 'member-order': 'bysource', + "members": True, + "undoc-members": True, + "show-inheritance": True, + "member-order": "bysource", } # Configure intersphinx mapping for external projects intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), + "python": ("https://docs.python.org/3", None), } # Ensure that objects are documented only once at their canonical location canonical_module_mapping = {} + def get_canonical_path(obj_module): """Determines the canonical path for a module.""" if obj_module in canonical_module_mapping: return canonical_module_mapping[obj_module] return obj_module + def process_docstring(app, what, name, obj, options, lines): """Process docstrings to add any needed directives or modify content.""" # Add any processing here if needed pass + def process_signature(app, what, name, obj, options, signature, return_annotation): """Process signatures to standardize them across the codebase.""" # Add any processing here if needed return (signature, return_annotation) + # Configure nitpicky mode to be less strict nitpicky = False + def setup(app): - app.connect('autodoc-process-docstring', process_docstring) - app.connect('autodoc-process-signature', process_signature) + app.connect("autodoc-process-docstring", process_docstring) + app.connect("autodoc-process-signature", process_signature) # Create static directory if it doesn't exist to avoid the warning - if not os.path.exists(os.path.join(os.path.dirname(__file__), '_static')): - os.makedirs(os.path.join(os.path.dirname(__file__), '_static')) + if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")): + os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) diff --git a/graphdoc/docs/generate_docs.py b/graphdoc/docs/generate_docs.py index 43ef125..0d955c6 100755 --- a/graphdoc/docs/generate_docs.py +++ b/graphdoc/docs/generate_docs.py @@ -4,72 +4,80 @@ Run this script before building the documentation to ensure all RST files are up-to-date. """ import os -import subprocess import shutil +import subprocess import sys + def main(): # Get the directory where this script is located docs_dir = os.path.dirname(os.path.abspath(__file__)) - + # The path to the module we want to document - module_dir = os.path.abspath(os.path.join(docs_dir, '..')) - + module_dir = os.path.abspath(os.path.join(docs_dir, "..")) + # Where to output the rst files output_dir = docs_dir - + # Clean up existing RST files except for special ones - preserve_files = ['index.rst', 'conf.py', 'generate_docs.py'] + preserve_files = ["index.rst", "conf.py", "generate_docs.py"] for filename in os.listdir(output_dir): filepath = os.path.join(output_dir, filename) - if (filename.endswith('.rst') and filename not in preserve_files and - os.path.isfile(filepath)): + if ( + filename.endswith(".rst") + and filename not in preserve_files + and os.path.isfile(filepath) + ): print(f"Removing {filepath}") os.unlink(filepath) - + # Run sphinx-apidoc - subprocess.run([ - 'sphinx-apidoc', - '-f', # Force overwriting of existing files - '-e', # Put module documentation before submodule documentation - '-M', # Put module documentation before member documentation - '-o', output_dir, # Output directory - module_dir, # Module directory - 'setup.py', # Exclude these files/patterns - '*tests*', - '*venv*', - '*docs*' - ]) - + subprocess.run( + [ + "sphinx-apidoc", + "-f", # Force overwriting of existing files + "-e", # Put module documentation before submodule documentation + "-M", # Put module documentation before member documentation + "-o", + output_dir, # Output directory + module_dir, # Module directory + "setup.py", # Exclude these files/patterns + "*tests*", + "*venv*", + "*docs*", + ] + ) + # Add custom content to the module RST files customize_rst_files(output_dir) - + print("\nRST files have been generated successfully!") print("You can now build the documentation with: cd docs && make html") + def customize_rst_files(output_dir): """Add custom content to the RST files.""" # Example: Add a note about auto-generation to each RST file for filename in os.listdir(output_dir): - if filename.endswith('.rst') and filename != 'index.rst': + if filename.endswith(".rst") and filename != "index.rst": filepath = os.path.join(output_dir, filename) - with open(filepath, 'r') as f: + with open(filepath, "r") as f: content = f.read() - + # Add noindex to submodules to prevent duplicates content = content.replace( - ":show-inheritance:", - ":show-inheritance:\n :noindex:" + ":show-inheritance:", ":show-inheritance:\n :noindex:" ) - - with open(filepath, 'w') as f: + + with open(filepath, "w") as f: f.write(content) - + # Create or update index.rst if it doesn't exist - index_path = os.path.join(output_dir, 'index.rst') + index_path = os.path.join(output_dir, "index.rst") if not os.path.exists(index_path): - with open(index_path, 'w') as f: - f.write(""".. GraphDoc documentation master file + with open(index_path, "w") as f: + f.write( + """.. GraphDoc documentation master file Welcome to GraphDoc's documentation ================================== @@ -86,7 +94,9 @@ def customize_rst_files(output_dir): * :ref:`genindex` * :ref:`modindex` * :ref:`search` -""") +""" + ) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/graphdoc/graphdoc/config.py b/graphdoc/graphdoc/config.py index 07d055b..7cf9dcb 100644 --- a/graphdoc/graphdoc/config.py +++ b/graphdoc/graphdoc/config.py @@ -76,7 +76,7 @@ def mlflow_data_helper_from_yaml(yaml_path: Union[str, Path]) -> MlflowDataHelpe mlflow_tracking_username: !env MLFLOW_TRACKING_USERNAME # The username for the mlflow tracking server mlflow_tracking_password: !env MLFLOW_TRACKING_PASSWORD # The password for the mlflow tracking server - """ + """ # noqa: B950 config = load_yaml_config(yaml_path) return mlflow_data_helper_from_dict( config["mlflow"], @@ -621,14 +621,34 @@ def doc_generator_eval_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorEva :return: A DocGeneratorEvaluator object. :rtype: DocGeneratorEvaluator - """ + """ # noqa: B950 + # load the generator + generator = doc_generator_module_from_yaml(yaml_path) config = load_yaml_config(yaml_path) - module = doc_generator_module_from_yaml(yaml_path) + + # load the evaluator + metric_config = config["prompt_metric"] + evaluator = single_prompt_from_dict(metric_config, metric_config["metric"]) + + # load the eval config + mdh = mlflow_data_helper_from_yaml(yaml_path) # noqa: F841 + mlflow_tracking_uri = config["eval"]["mlflow_tracking_uri"] + mlflow_experiment_name = config["eval"]["mlflow_experiment_name"] + generator_prediction_field = config["eval"]["generator_prediction_field"] + evaluator_prediction_field = config["eval"]["evaluator_prediction_field"] + readable_value = config["eval"]["readable_value"] + + # load the evalset + evalset = trainset_from_yaml(yaml_path) + + # return the evaluator return DocGeneratorEvaluator( - tracking_uri=config["eval"]["mlflow_tracking_uri"], - experiment_name=config["eval"]["mlflow_experiment_name"], - module=module, - generator_prediction_field=config["eval"]["generator_prediction_field"], - evaluator_prediction_field=config["eval"]["evaluator_prediction_field"], - readable_value=config["eval"]["readable_value"], + generator=generator, + evaluator=evaluator, + evalset=evalset, + mlflow_tracking_uri=mlflow_tracking_uri, + mlflow_experiment_name=mlflow_experiment_name, + generator_prediction_field=generator_prediction_field, + evaluator_prediction_field=evaluator_prediction_field, + readable_value=readable_value, ) diff --git a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py index 874141d..9bc908f 100644 --- a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py @@ -21,8 +21,9 @@ class DspyDataHelper(ABC): """Abstract class for creating data objects related to a given dspy.Signature. - + :no-index: + """ ####################### diff --git a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py index 482147d..694e928 100644 --- a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py @@ -20,7 +20,7 @@ class GenerationDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Generation dspy.Signature. - + :no-index: The example signature is defined as: diff --git a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py index 78c0273..db1d28b 100644 --- a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py @@ -20,7 +20,7 @@ class QualityDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Quality dspy.Signature. - + :no-index: The example signature is defined as: diff --git a/graphdoc/graphdoc/data/local.py b/graphdoc/graphdoc/data/local.py index d5209d2..da48fc4 100644 --- a/graphdoc/graphdoc/data/local.py +++ b/graphdoc/graphdoc/data/local.py @@ -30,7 +30,7 @@ # check out how pytorch etc. handles loading in something like imagenet class LocalDataHelper: """A helper class for loading data from a directory. - + :no-index: :param schema_directory_path: The path to the directory containing the schemas diff --git a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py index 1016999..6851650 100644 --- a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py +++ b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py @@ -29,7 +29,7 @@ def __init__( mlflow_tracking_password: Optional[str] = None, ): """A helper class for loading and saving models and metadata from mlflow. - + :no-index: :param mlflow_tracking_uri: The uri of the mlflow tracking server. diff --git a/graphdoc/graphdoc/data/parser.py b/graphdoc/graphdoc/data/parser.py index e5ec0fb..16e4f6d 100644 --- a/graphdoc/graphdoc/data/parser.py +++ b/graphdoc/graphdoc/data/parser.py @@ -31,8 +31,9 @@ class Parser: """A class for parsing and handling of GraphQL objects. - + :no-index: + """ DEFAULT_NODE_TYPES = { diff --git a/graphdoc/graphdoc/data/schema.py b/graphdoc/graphdoc/data/schema.py index fefb343..e1ef400 100644 --- a/graphdoc/graphdoc/data/schema.py +++ b/graphdoc/graphdoc/data/schema.py @@ -22,10 +22,11 @@ class SchemaCategory(str, Enum): """Schema quality categories enumeration. - + :no-index: + """ - + PERFECT = "perfect" ALMOST_PERFECT = "almost perfect" POOR_BUT_CORRECT = "poor but correct" @@ -42,10 +43,11 @@ def from_str(cls, value: str) -> Optional["SchemaCategory"]: class SchemaRating(str, Enum): """Schema quality ratings enumeration. - + :no-index: + """ - + FOUR = "4" THREE = "3" TWO = "2" @@ -64,8 +66,9 @@ def from_value(cls, value: Union[str, int]) -> Optional["SchemaRating"]: class SchemaCategoryRatingMapping: """Mapping between schema categories and ratings. - + :no-index: + """ @staticmethod @@ -105,10 +108,11 @@ def get_category(rating: SchemaRating) -> SchemaCategory: class SchemaType(str, Enum): """Schema type enumeration. - + :no-index: + """ - + FULL_SCHEMA = "full schema" TABLE_SCHEMA = "table schema" ENUM_SCHEMA = "enum schema" @@ -123,8 +127,9 @@ def from_str(cls, value: str) -> Optional["SchemaType"]: class SchemaCategoryPath(str, Enum): """Maps schema categories to their folder names. - + :no-index: + """ PERFECT = "perfect" @@ -156,10 +161,11 @@ def get_path( @dataclass class SchemaObject: """Schema object containing schema data and metadata. - + :no-index: + """ - + key: str category: Optional[Enum] = None rating: Optional[Enum] = None diff --git a/graphdoc/graphdoc/docs/conf.py b/graphdoc/graphdoc/docs/conf.py index 34e3df0..3004528 100644 --- a/graphdoc/graphdoc/docs/conf.py +++ b/graphdoc/graphdoc/docs/conf.py @@ -3,32 +3,36 @@ # add these directories to sys.path here. import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # -- Extension configuration ------------------------------------------------- # Set the primary domain to Python to avoid duplicate object descriptions -primary_domain = 'py' +primary_domain = "py" # Add autodoc settings to prevent duplicate warnings autodoc_default_options = { - 'members': True, - 'undoc-members': True, - 'show-inheritance': True, + "members": True, + "undoc-members": True, + "show-inheritance": True, } # Properly handle duplicates -intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} + # Skip dspy.Signature classes which are causing warnings def skip_dspy_signatures(app, what, name, obj, skip, options): import inspect + if inspect.isclass(obj): # Skip classes that inherit from dspy.Signature for base in obj.__mro__: - if base.__name__ == 'Signature' and base.__module__.startswith('dspy'): + if base.__name__ == "Signature" and base.__module__.startswith("dspy"): return True return skip + def setup(app): - app.connect('autodoc-skip-member', skip_dspy_signatures) \ No newline at end of file + app.connect("autodoc-skip-member", skip_dspy_signatures) diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index 7a40442..ee87404 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -30,7 +30,7 @@ def __init__( """Initialize the DocGeneratorModule. A module for generating documentation for a given GraphQL schema. Schemas are decomposed and individually used to generate documentation, with a quality check after each generation. - + :no-index: signature fields are: @@ -46,7 +46,8 @@ def __init__( :param rating_threshold: The minimum rating for a generated document to be considered valid. :type rating_threshold: int - :param fill_empty_descriptions: Whether to fill empty descriptions with generated documentation. + :param fill_empty_descriptions: Whether to fill empty descriptions with + generated documentation. :type fill_empty_descriptions: bool """ diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 5c4b6f0..103275e 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -23,7 +23,7 @@ ################### class DocGeneratorSignature(dspy.Signature): """A signature that takes a full GraphQL schema and returns a documented schema. - + :no-index: """ @@ -36,7 +36,7 @@ class DocGeneratorSignature(dspy.Signature): class DocGeneratorHelperSignature(dspy.Signature): """A signature that takes a code section that requires a transformation as well as the current description of that section and returns a new description. - + :no-index: """ @@ -49,7 +49,7 @@ class DocGeneratorHelperSignature(dspy.Signature): class BadDocGeneratorSignature(dspy.Signature): """A signature that takes a full GraphQL schema and returns a list of issues with the schema. - + :no-index: """ @@ -90,7 +90,7 @@ def doc_gen_factory( ####################### class DocGeneratorPrompt(SinglePrompt): """DocGeneratorPrompt class for generating documentation for GraphQL schemas. - + :no-index: """ diff --git a/graphdoc/graphdoc/prompts/schema_doc_quality.py b/graphdoc/graphdoc/prompts/schema_doc_quality.py index 653f34a..716d363 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_quality.py +++ b/graphdoc/graphdoc/prompts/schema_doc_quality.py @@ -21,9 +21,9 @@ ################### class DocQualitySignature(dspy.Signature): """A signature for evaluating the quality of GraphQL schema documentation. - + :no-index: - + You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. The categories are described as: @@ -36,22 +36,22 @@ class DocQualitySignature(dspy.Signature): """ # noqa: B950 database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) + category: Literal[ + "perfect", "almost perfect", "poor but correct", "incorrect" + ] = dspy.OutputField() rating: Literal[4, 3, 2, 1] = dspy.OutputField() class DocQualityDemonstrationSignature(dspy.Signature): """A signature for demonstrating good, average and bad GraphQL schema documentation quality examples. - + :no-index: - """ + """ # noqa: B950 database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) + category: Literal[ + "perfect", "almost perfect", "poor but correct", "incorrect" + ] = dspy.OutputField() rating: Literal[4, 3, 2, 1] = dspy.OutputField() @@ -84,9 +84,9 @@ def doc_quality_factory( ####################### class DocQualityPrompt(SinglePrompt): """DocQualityPrompt class for evaluating documentation quality. - + :no-index: - + This is a single prompt that can be used to evaluate the quality of the documentation for a given schema. This is a wrapper around the SinglePrompt class that implements the abstract methods. diff --git a/graphdoc/graphdoc/prompts/single_prompt.py b/graphdoc/graphdoc/prompts/single_prompt.py index 32be6c4..1738f1e 100644 --- a/graphdoc/graphdoc/prompts/single_prompt.py +++ b/graphdoc/graphdoc/prompts/single_prompt.py @@ -28,7 +28,7 @@ def __init__( # and tighter coupling ) -> None: """Initialize a single prompt. - + :no-index: :param prompt: The prompt to use. @@ -119,7 +119,8 @@ def compare_metrics( :type base_metrics: Any :param optimized_metrics: The metrics of the optimized model. :type optimized_metrics: Any - :param comparison_value: The value to compare the metrics on. Determines which metric is used to compare the models. + :param comparison_value: The value to compare the metrics on. + Determines which metric is used to compare the models. :type comparison_value: str :return: True if the optimized model is better than the base model, False otherwise. :rtype: bool diff --git a/graphdoc/graphdoc/train/doc_quality_trainer.py b/graphdoc/graphdoc/train/doc_quality_trainer.py index ebbbec5..7826ace 100644 --- a/graphdoc/graphdoc/train/doc_quality_trainer.py +++ b/graphdoc/graphdoc/train/doc_quality_trainer.py @@ -76,6 +76,7 @@ def evaluation_metrics(self, base_evaluation, optimized_evaluation): :type base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the optimized model. :type optimized_evaluation: Any + """ base_evaluation_overall_score = base_evaluation["overall_score"] optimized_evaluation_overall_score = optimized_evaluation["overall_score"] diff --git a/graphdoc/graphdoc/train/single_prompt_trainer.py b/graphdoc/graphdoc/train/single_prompt_trainer.py index 7c6ec4d..78e69b8 100644 --- a/graphdoc/graphdoc/train/single_prompt_trainer.py +++ b/graphdoc/graphdoc/train/single_prompt_trainer.py @@ -49,6 +49,7 @@ def __init__( :type trainset: List[dspy.Example] :param evalset: The evaluation set. :type evalset: List[dspy.Example] + """ self.prompt = prompt self.optimizer_type = optimizer_type @@ -87,6 +88,7 @@ def evaluation_metrics(self, base_evaluation, optimized_evaluation): :type base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the optimized model. :type optimized_evaluation: Any + """ pass From e40db9e8f7e5e516780512218fad9403983d2b9d Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 13:40:13 -0500 Subject: [PATCH 03/13] fix(prompts): revert to original prompts --- .../graphdoc/prompts/schema_doc_generation.py | 49 ++++++++++++++----- .../graphdoc/prompts/schema_doc_quality.py | 38 ++++++++++---- 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 103275e..e2102ba 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -22,10 +22,19 @@ # DSPy Signatures # ################### class DocGeneratorSignature(dspy.Signature): - """A signature that takes a full GraphQL schema and returns a documented schema. - - :no-index: """ + ### TASK: Given a GraphQL Schema, generate a precise description for the columns of the tables in the database. + + ### Requirements: + - Focus solely on confirmed details from the provided schema. + - Keep the description concise and factual. + - Exclude any speculative or additional commentary. + - DO NOT return the phrase "in the { table } table" in your description. + + ### Formatting + - Ensure that the schema maintains proper documentation formatting, as is provided. + + """ # noqa: B950 database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( @@ -34,11 +43,21 @@ class DocGeneratorSignature(dspy.Signature): class DocGeneratorHelperSignature(dspy.Signature): - """A signature that takes a code section that requires a transformation as well as - the current description of that section and returns a new description. - - :no-index: """ + ### TASK: Analyze the provided GraphQL Schema and generate detailed yet concise descriptions for each field within the database tables and enums. + + ### Requirements: + - If the field is unclear, and the documentation result is ambiguous, request additional information: "WARNING: Please provide additional information to avoid confusion". + - Utilize only the verified information from the schema to ensure accuracy. + - Descriptions should be factual, straightforward, and avoid any speculative language. + - Refrain from using the phrase "in the { table } table" within your descriptions. + - Ensure that the documentation adheres to standard schema formatting without modifying the underlying schema structure. + + ### Formatting: + - Maintain consistency with the existing documentation style and structure. + - Focus on clarity and precision to aid developers and system architects in understanding the schema's components effectively. + + """ # noqa: B950 database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( @@ -47,11 +66,19 @@ class DocGeneratorHelperSignature(dspy.Signature): class BadDocGeneratorSignature(dspy.Signature): - """A signature that takes a full GraphQL schema and returns a list of - issues with the schema. - - :no-index: """ + ### TASK: Given a GraphQL Schema, generate intentionally incorrect documentation for the columns of the tables in the database. + + ### Requirements: + - Every table, entity, enum, etc. must have at least one column with a description that is obviosly incorrect. + - The documentation must be incorrect and misleading. + - The documentation should be scattered, with only some columns having documentation. + + ### Formatting + - Ensure that the schema maintains proper documentation formatting, as is provided. + + """ # noqa: B950 + database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( diff --git a/graphdoc/graphdoc/prompts/schema_doc_quality.py b/graphdoc/graphdoc/prompts/schema_doc_quality.py index 716d363..7ddc22a 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_quality.py +++ b/graphdoc/graphdoc/prompts/schema_doc_quality.py @@ -20,11 +20,7 @@ # DSPy Signatures # ################### class DocQualitySignature(dspy.Signature): - """A signature for evaluating the quality of GraphQL schema documentation. - - :no-index: - - You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. + """You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. The categories are described as: - perfect (4): The documentation contains enough information so that the interpretation of the schema and its database content is completely free of ambiguity. @@ -43,9 +39,35 @@ class DocQualitySignature(dspy.Signature): class DocQualityDemonstrationSignature(dspy.Signature): - """A signature for demonstrating good, average and bad GraphQL schema documentation quality examples. + """You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. + + The categories are described as: + - perfect (4): The documentation contains enough information so that the interpretation of the schema and its database content is completely free of ambiguity. + perfect (4) example: + type Domain @entity { + " The namehash (id) of the parent name. References the Domain entity that is the parent of the current domain. Type: Domain " + parent: Domain + } + - almost perfect (3): The documentation is almost perfect and free from ambiguity, but there is room for improvement. + almost perfect (3) example: + type Token @entity { + " Name of the token, mirrored from the smart contract " + name: String! + } + - poor but correct (2): The documentation is poor but correct and has room for improvement due to missing information. The documentation is not incorrect. + poor but correct (2) example: + type InterestRate @entity { + "Description for column: id" + id: ID! + } + - incorrect (1): The documentation is incorrect and contains inaccurate or misleading information. Any incorrect information automatically leads to an incorrect rating, even if some correct information is present. + incorrect (1) example: + type BridgeProtocol implements Protocol @entity { + " Social Security Number of the protocol's main developer " + id: Bytes! + } + Output a number rating that corresponds to the categories described above. - :no-index: """ # noqa: B950 database_schema: str = dspy.InputField() @@ -110,10 +132,8 @@ def __init__( :param prompt: The prompt to use. Can either be a string that maps to a defined signature, as set in the doc_quality_factory, or a dspy.Signature. :type prompt: Union[str, dspy.Signature] - :param prompt_type: The type of prompt to use. :type prompt_type: Union[Literal["predict", "chain_of_thought"], Callable] - :param prompt_metric: The metric to use. Can either be a string that maps to a defined metric, as set in the doc_quality_factory, or a custom callable function. Function must have the signature (example: dspy.Example, prediction: From 8fa2dcc5fbb0342089c3dfd5c40c09d02356a2af Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 13:42:05 -0500 Subject: [PATCH 04/13] style: format with black - schema doc gen --- graphdoc/graphdoc/prompts/schema_doc_generation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index e2102ba..ca6bfe2 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -79,7 +79,6 @@ class BadDocGeneratorSignature(dspy.Signature): """ # noqa: B950 - database_schema: str = dspy.InputField() documented_schema: str = dspy.OutputField( desc="The database schema with intentionally incorrect documentation, ensuring that the underlying schema is not altered." # noqa: B950 From efd526f0de5dd3ffcb3f68714b4fe88148f1f153 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 14:19:09 -0500 Subject: [PATCH 05/13] docs: remove unused :no-index: flags --- .../data/dspy_data/dspy_data_helper.py | 6 +--- .../data/dspy_data/generation_data_helper.py | 2 -- .../data/dspy_data/quality_data_helper.py | 2 -- graphdoc/graphdoc/data/local.py | 2 -- .../data/mlflow_data/mlflow_data_helper.py | 2 -- graphdoc/graphdoc/data/parser.py | 6 +--- graphdoc/graphdoc/data/schema.py | 36 ++++--------------- .../graphdoc/modules/doc_generator_module.py | 2 -- .../graphdoc/prompts/schema_doc_generation.py | 5 +-- .../graphdoc/prompts/schema_doc_quality.py | 2 -- graphdoc/graphdoc/prompts/single_prompt.py | 2 -- 11 files changed, 9 insertions(+), 58 deletions(-) diff --git a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py index 9bc908f..75aeb6d 100644 --- a/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/dspy_data_helper.py @@ -20,11 +20,7 @@ class DspyDataHelper(ABC): - """Abstract class for creating data objects related to a given dspy.Signature. - - :no-index: - - """ + """Abstract class for creating data objects related to a given dspy.Signature.""" ####################### # Class Methods # diff --git a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py index 694e928..b261d5f 100644 --- a/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/generation_data_helper.py @@ -21,8 +21,6 @@ class GenerationDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Generation dspy.Signature. - :no-index: - The example signature is defined as: ``` database_schema: str = dspy.InputField() diff --git a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py index db1d28b..413c623 100644 --- a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py @@ -21,8 +21,6 @@ class QualityDataHelper(DspyDataHelper): """A helper class for creating data objects related to our Documentation Quality dspy.Signature. - :no-index: - The example signature is defined as: .. code-block:: python diff --git a/graphdoc/graphdoc/data/local.py b/graphdoc/graphdoc/data/local.py index da48fc4..f2bb54a 100644 --- a/graphdoc/graphdoc/data/local.py +++ b/graphdoc/graphdoc/data/local.py @@ -31,8 +31,6 @@ class LocalDataHelper: """A helper class for loading data from a directory. - :no-index: - :param schema_directory_path: The path to the directory containing the schemas :type schema_directory_path: Union[str, Path] Defaults to the path to the schemas in the graphdoc package. diff --git a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py index 6851650..2b14c18 100644 --- a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py +++ b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py @@ -30,8 +30,6 @@ def __init__( ): """A helper class for loading and saving models and metadata from mlflow. - :no-index: - :param mlflow_tracking_uri: The uri of the mlflow tracking server. :type mlflow_tracking_uri: Union[str, Path] :param mlflow_tracking_username: The username for the mlflow tracking server. diff --git a/graphdoc/graphdoc/data/parser.py b/graphdoc/graphdoc/data/parser.py index 16e4f6d..866446f 100644 --- a/graphdoc/graphdoc/data/parser.py +++ b/graphdoc/graphdoc/data/parser.py @@ -30,11 +30,7 @@ class Parser: - """A class for parsing and handling of GraphQL objects. - - :no-index: - - """ + """A class for parsing and handling of GraphQL objects.""" DEFAULT_NODE_TYPES = { DocumentNode: "full schema", diff --git a/graphdoc/graphdoc/data/schema.py b/graphdoc/graphdoc/data/schema.py index e1ef400..f0a21ed 100644 --- a/graphdoc/graphdoc/data/schema.py +++ b/graphdoc/graphdoc/data/schema.py @@ -21,11 +21,7 @@ class SchemaCategory(str, Enum): - """Schema quality categories enumeration. - - :no-index: - - """ + """Schema quality categories enumeration.""" PERFECT = "perfect" ALMOST_PERFECT = "almost perfect" @@ -42,11 +38,7 @@ def from_str(cls, value: str) -> Optional["SchemaCategory"]: class SchemaRating(str, Enum): - """Schema quality ratings enumeration. - - :no-index: - - """ + """Schema quality ratings enumeration.""" FOUR = "4" THREE = "3" @@ -65,11 +57,7 @@ def from_value(cls, value: Union[str, int]) -> Optional["SchemaRating"]: class SchemaCategoryRatingMapping: - """Mapping between schema categories and ratings. - - :no-index: - - """ + """Mapping between schema categories and ratings.""" @staticmethod def get_rating(category: SchemaCategory) -> SchemaRating: @@ -107,11 +95,7 @@ def get_category(rating: SchemaRating) -> SchemaCategory: class SchemaType(str, Enum): - """Schema type enumeration. - - :no-index: - - """ + """Schema type enumeration.""" FULL_SCHEMA = "full schema" TABLE_SCHEMA = "table schema" @@ -126,11 +110,7 @@ def from_str(cls, value: str) -> Optional["SchemaType"]: class SchemaCategoryPath(str, Enum): - """Maps schema categories to their folder names. - - :no-index: - - """ + """Maps schema categories to their folder names.""" PERFECT = "perfect" ALMOST_PERFECT = "almost_perfect" @@ -160,11 +140,7 @@ def get_path( @dataclass class SchemaObject: - """Schema object containing schema data and metadata. - - :no-index: - - """ + """Schema object containing schema data and metadata.""" key: str category: Optional[Enum] = None diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index ee87404..ee41c5b 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -31,8 +31,6 @@ def __init__( a given GraphQL schema. Schemas are decomposed and individually used to generate documentation, with a quality check after each generation. - :no-index: - signature fields are: - database_schema: str = dspy.InputField() - documented_schema: str = dspy.OutputField() diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index ca6bfe2..351f26b 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -115,10 +115,7 @@ def doc_gen_factory( # Single Prompt Class # ####################### class DocGeneratorPrompt(SinglePrompt): - """DocGeneratorPrompt class for generating documentation for GraphQL schemas. - - :no-index: - """ + """DocGeneratorPrompt class for generating documentation for GraphQL schemas.""" def __init__( self, diff --git a/graphdoc/graphdoc/prompts/schema_doc_quality.py b/graphdoc/graphdoc/prompts/schema_doc_quality.py index 7ddc22a..620d6c7 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_quality.py +++ b/graphdoc/graphdoc/prompts/schema_doc_quality.py @@ -107,8 +107,6 @@ def doc_quality_factory( class DocQualityPrompt(SinglePrompt): """DocQualityPrompt class for evaluating documentation quality. - :no-index: - This is a single prompt that can be used to evaluate the quality of the documentation for a given schema. This is a wrapper around the SinglePrompt class that implements the abstract methods. diff --git a/graphdoc/graphdoc/prompts/single_prompt.py b/graphdoc/graphdoc/prompts/single_prompt.py index 1738f1e..20bafb4 100644 --- a/graphdoc/graphdoc/prompts/single_prompt.py +++ b/graphdoc/graphdoc/prompts/single_prompt.py @@ -29,8 +29,6 @@ def __init__( ) -> None: """Initialize a single prompt. - :no-index: - :param prompt: The prompt to use. :type prompt: dspy.Signature :param prompt_type: The type of prompt to use. Can be "predict" or From 3441bcfc94e940071bf30c94c3ad36700108564c Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 14:30:49 -0500 Subject: [PATCH 06/13] refactor: remove unused conf.py file and docs folder --- graphdoc/graphdoc/docs/conf.py | 38 ---------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 graphdoc/graphdoc/docs/conf.py diff --git a/graphdoc/graphdoc/docs/conf.py b/graphdoc/graphdoc/docs/conf.py deleted file mode 100644 index 3004528..0000000 --- a/graphdoc/graphdoc/docs/conf.py +++ /dev/null @@ -1,38 +0,0 @@ -# -- Path setup -------------------------------------------------------------- -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. -import os -import sys - -sys.path.insert(0, os.path.abspath("..")) - -# -- Extension configuration ------------------------------------------------- - -# Set the primary domain to Python to avoid duplicate object descriptions -primary_domain = "py" - -# Add autodoc settings to prevent duplicate warnings -autodoc_default_options = { - "members": True, - "undoc-members": True, - "show-inheritance": True, -} - -# Properly handle duplicates -intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} - - -# Skip dspy.Signature classes which are causing warnings -def skip_dspy_signatures(app, what, name, obj, skip, options): - import inspect - - if inspect.isclass(obj): - # Skip classes that inherit from dspy.Signature - for base in obj.__mro__: - if base.__name__ == "Signature" and base.__module__.startswith("dspy"): - return True - return skip - - -def setup(app): - app.connect("autodoc-skip-member", skip_dspy_signatures) From 9b1c87b9be55f19d2b1055ff7869b442d913f400 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 14:45:29 -0500 Subject: [PATCH 07/13] refactor: clean up unused functions --- graphdoc/docs/conf.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index 57a2551..2cb8c62 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -55,12 +55,6 @@ sys.path.insert(0, os.path.abspath("..")) -# -- Suppress specific warnings ----------------------------------------------- -# This suppresses specific warning types that we want to ignore -suppress_warnings = [ - "autodoc.duplicate_object_description", -] - # Configure autodoc settings to handle duplicate signatures autodoc_default_options = { "members": True, @@ -78,32 +72,11 @@ canonical_module_mapping = {} -def get_canonical_path(obj_module): - """Determines the canonical path for a module.""" - if obj_module in canonical_module_mapping: - return canonical_module_mapping[obj_module] - return obj_module - - -def process_docstring(app, what, name, obj, options, lines): - """Process docstrings to add any needed directives or modify content.""" - # Add any processing here if needed - pass - - -def process_signature(app, what, name, obj, options, signature, return_annotation): - """Process signatures to standardize them across the codebase.""" - # Add any processing here if needed - return (signature, return_annotation) - - # Configure nitpicky mode to be less strict nitpicky = False def setup(app): - app.connect("autodoc-process-docstring", process_docstring) - app.connect("autodoc-process-signature", process_signature) # Create static directory if it doesn't exist to avoid the warning if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")): os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) From 7eaa4117a4f164c6b4984ef4ea7bbed5995ca2ce Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 14:45:51 -0500 Subject: [PATCH 08/13] ci: remove docs-init option from run.sh --- graphdoc/run.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/graphdoc/run.sh b/graphdoc/run.sh index 73cc0d0..26c4f46 100755 --- a/graphdoc/run.sh +++ b/graphdoc/run.sh @@ -127,7 +127,6 @@ show_help() { echo " commit Format, lint, and test the code" echo " docs-generate Generate documentation RST files" echo " docs Build the documentation" - echo " docs-init Initialize the Sphinx documentation" # train commands echo " doc-quality-train Train a document quality model" @@ -157,7 +156,6 @@ else "commit") commit_command ;; "docs-generate") docs_generate ;; "docs") docs ;; - "docs-init") docs_init ;; "doc-quality-train") doc_quality_train_command ;; "doc-generator-train") doc_generator_train_command ;; "doc-generator-eval") doc_generator_eval_command ;; From 41ebf7af766f2c256761f054cc6a67bea1524a49 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 14:53:18 -0500 Subject: [PATCH 09/13] docs: add links.rst for dynamic imports --- graphdoc/docs/generate_docs.py | 2 +- graphdoc/docs/index.rst | 13 +++++++------ graphdoc/docs/links.rst | 5 +++++ 3 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 graphdoc/docs/links.rst diff --git a/graphdoc/docs/generate_docs.py b/graphdoc/docs/generate_docs.py index 0d955c6..5f1c1e0 100755 --- a/graphdoc/docs/generate_docs.py +++ b/graphdoc/docs/generate_docs.py @@ -20,7 +20,7 @@ def main(): output_dir = docs_dir # Clean up existing RST files except for special ones - preserve_files = ["index.rst", "conf.py", "generate_docs.py"] + preserve_files = ["index.rst", "conf.py", "generate_docs.py", "links.rst"] for filename in os.listdir(output_dir): filepath = os.path.join(output_dir, filename) if ( diff --git a/graphdoc/docs/index.rst b/graphdoc/docs/index.rst index 3b6f486..73eee52 100644 --- a/graphdoc/docs/index.rst +++ b/graphdoc/docs/index.rst @@ -6,14 +6,15 @@ GraphDoc documentation ====================== -GraphDoc is a tool for generating GraphQL documentation given a GraphQL schema. It is coupled with tooling for interacting with -MLflow for tracking and logging. At the root of the graphdoc directory, -you will find a mlflow-manager directory that can help you setup and manage a local MLflow server. Within the graphdoc directory, you will -find a run.sh script that can help you get started with a majority of common workflows that you may want to run. Just reach open an issue, or -submit a PR, if you have any questions or feedback. Thanks! +.. include:: links.rst -License: Apache License 2.0 +`GraphDoc` is a tool for generating GraphQL documentation given a GraphQL schema. It is coupled with tooling for interacting with +`MLflow `_ for tracking and logging. At the root of the `graphdoc `_ directory, +you will find a `mlflow-manager `_ directory that can help you setup and manage a local MLflow server. Within the `graphdoc` directory, you will +find a `run.sh `_ script that can help you get started with a majority of common workflows that you may want to run. Just reach open an issue, or +submit a PR, if you have any questions or feedback. Thanks! +License: `Apache License 2.0 `_ .. toctree:: :maxdepth: 2 diff --git a/graphdoc/docs/links.rst b/graphdoc/docs/links.rst new file mode 100644 index 0000000..e796213 --- /dev/null +++ b/graphdoc/docs/links.rst @@ -0,0 +1,5 @@ +.. _mlflow: https://mlflow.org/ +.. _graphdoc_repo: https://github.com/semiotic-ai/graphdoc +.. _mlflow_manager: https://github.com/semiotic-ai/graphdoc/tree/main/mlflow-manager +.. _run_script: https://github.com/semiotic-ai/graphdoc/blob/main/run.sh +.. _Apache License 2.0: https://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file From 2a01793d939b3f9e22a952b4c1b0444d92be96ce Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 15:02:28 -0500 Subject: [PATCH 10/13] docs: complete compare_metrics docstring --- graphdoc/graphdoc/prompts/schema_doc_generation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 351f26b..7636933 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -219,7 +219,10 @@ def compare_metrics( :param base_metrics: The base metrics. :type base_metrics: Any - :param optimized_metrics: The optimized metrics. :type + :param optimized_metrics: The optimized metrics. + :type optimized_metrics: Any + :param comparison_value: The value to compare. + :type comparison_value: str """ if comparison_value == "overall_score": From f09e997dae9b90ed5d0aca836b85ffba6581e8ef Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 16:19:33 -0500 Subject: [PATCH 11/13] refactor: remove duplicate run.sh in graphdoc root --- graphdoc/graphdoc/run.sh | 164 --------------------------------------- 1 file changed, 164 deletions(-) delete mode 100644 graphdoc/graphdoc/run.sh diff --git a/graphdoc/graphdoc/run.sh b/graphdoc/graphdoc/run.sh deleted file mode 100644 index 044b956..0000000 --- a/graphdoc/graphdoc/run.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/bin/bash - -# development and installation commands -python_command() { - poetry run python -} - -shell_command() { - poetry shell -} - -install_command() { - poetry install --without dev -} - -dev_command() { - poetry install --with dev -} - -requirements_command() { - poetry export -f requirements.txt --without-hashes --with dev,docs --output requirements.txt -} - -format_command() { - poetry run black . -} - -docstring_format_command() { - poetry run docformatter --black --style sphinx --in-place --exclude="prompts" --recursive graphdoc/ - poetry run docformatter --black --style sphinx --in-place --recursive runners/ - poetry run docformatter --black --style sphinx --in-place --recursive tests/ -} - -pep8_check_command() { - poetry run flake8 graphdoc/ - poetry run flake8 runners/ - poetry run flake8 tests/ -} - -sort_command() { - poetry run isort . -} - -lint_command() { - poetry run pyright . -} - -test_command() { - poetry run pytest --testmon -p no:warnings -} - -commit_command() { - format_command - docstring_format_command - sort_command - lint_command - pep8_check_command - test_command - requirements_command -} - -# Documentation commands -docs() { - echo "Generating RST files..." - cd docs && python generate_docs.py - - echo "Building documentation..." - cd docs && make clean html - echo "Documentation built in docs/build/html" -} - -docs_init() { - echo "Initializing Sphinx documentation..." - # Remove existing docs directory if it exists - rm -rf docs - # Create fresh docs directory - mkdir -p docs - cd docs - sphinx-quickstart -q \ - -p GraphDoc \ - -a "Semiotic Labs" \ - -v 1.0 \ - -r 1.0 \ - -l en \ - --ext-autodoc \ - --ext-viewcode \ - --makefile \ - --batchfile - # Create necessary directories - mkdir -p source/_static source/_templates - echo "Sphinx documentation initialized" -} - -# train commands -doc_quality_train_command() { - poetry run python runners/train/single_prompt_trainer.py --config-path assets/configs/single_prompt_doc_quality_trainer.yaml -} - -doc_generator_train_command() { - poetry run python runners/train/single_prompt_trainer.py --config-path assets/configs/single_prompt_doc_generator_trainer.yaml -} - -# eval commands -doc_generator_eval_command() { - poetry run python runners/eval/eval_doc_generator_module.py --config-path assets/configs/single_prompt_doc_generator_module_eval.yaml -} - -# help menu -show_help() { - echo "Usage: ./nli [option]" - echo "Options:" - - # development and installation commands - echo " python Run Python" - echo " shell Run shell" - echo " install Install dependencies" - echo " dev Install dependencies with dev" - echo " requirements Generate requirements.txt" - echo " format Format the code" - echo " docstring-format Format the docstrings" - echo " pep-check Check the PEP8 compliance" - echo " lint Lint the code" - echo " test Run the tests" - echo " commit Format, lint, and test the code" - echo " docs Build the documentation" - echo " docs-init Initialize the Sphinx documentation" - - # train commands - echo " doc-quality-train Train a document quality model" - echo " doc-generator-train Train a document generator model" - - # eval commands - echo " doc-generator-eval Evaluate a document generator model" -} - -# handle command line arguments -if [ -z "$1" ]; then - show_help -else - case "$1" in - - # development and installation commands - "python") python_command ;; - "shell") shell_command ;; - "install") install_command ;; - "dev") dev_command ;; - "requirements") requirements_command ;; - "format") format_command ;; - "docstring-format") docstring_format_command ;; - "pep-check") pep8_check_command ;; - "lint") lint_command ;; - "test") test_command ;; - "commit") commit_command ;; - "docs") docs ;; - "docs-init") docs_init ;; - "doc-quality-train") doc_quality_train_command ;; - "doc-generator-train") doc_generator_train_command ;; - "doc-generator-eval") doc_generator_eval_command ;; - *) - echo "Usage: $0 {test|lint|format|docs|docs-init|doc-quality-train|doc-generator-train|doc-generator-eval}" - exit 1 - ;; - esac -fi \ No newline at end of file From df56323a1db7011ef9909a6de76af6620355b603 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 16:24:55 -0500 Subject: [PATCH 12/13] ci: remove duplicate doc commands from root run.sh --- run.sh | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/run.sh b/run.sh index f860c2d..7df50d5 100755 --- a/run.sh +++ b/run.sh @@ -42,19 +42,6 @@ mlflow_teardown() { cd .. } -# Documentation commands -docs_generate() { - echo "Generating documentation RST files..." - cd graphdoc && ./run.sh docs-generate - cd .. -} - -docs_build() { - echo "Building documentation..." - cd graphdoc && ./run.sh docs - cd .. -} - # train commands doc_quality_train() { echo "Training a document quality model..." @@ -89,10 +76,6 @@ show_help() { echo " mlflow-setup Install mlflow-manager dependencies and run the services" echo " mlflow-teardown Teardown mlflow-manager services" - # documentation commands - echo " docs-generate Generate documentation RST files automatically" - echo " docs-build Build documentation HTML files" - # train commands echo " doc-quality-train Train a document quality model" echo " doc-generator-train Train a document generator model" From 170be1d92fc8ff74f0f805612b53c73e728c9b41 Mon Sep 17 00:00:00 2001 From: denver Date: Wed, 12 Mar 2025 16:25:59 -0500 Subject: [PATCH 13/13] ci: remove doc comands from show_help --- run.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/run.sh b/run.sh index 7df50d5..17589ad 100755 --- a/run.sh +++ b/run.sh @@ -96,10 +96,6 @@ else # make commands "mlflow-setup") mlflow_setup ;; "mlflow-teardown") mlflow_teardown ;; - - # documentation commands - "docs-generate") docs_generate ;; - "docs-build") docs_build ;; # train commands "doc-quality-train") doc_quality_train ;;