diff --git a/graphdoc/docs/.nojekyll b/graphdoc/docs/.nojekyll new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/graphdoc/docs/.nojekyll @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index 1341699..b13eab0 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -20,6 +20,7 @@ "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", # Add intersphinx for better cross-referencing ] # Add Napoleon settings for Google-style docstrings @@ -53,3 +54,29 @@ import sys sys.path.insert(0, os.path.abspath("..")) + +# Configure autodoc settings to handle duplicate signatures +autodoc_default_options = { + "members": True, + "undoc-members": True, + "show-inheritance": True, + "member-order": "bysource", +} + +# Configure intersphinx mapping for external projects +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} + +# Ensure that objects are documented only once at their canonical location +canonical_module_mapping = {} + + +# Configure nitpicky mode to be less strict +nitpicky = False + + +def setup(app): + # Create static directory if it doesn't exist to avoid the warning + if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")): + os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) \ No newline at end of file diff --git a/graphdoc/docs/generate_docs.py b/graphdoc/docs/generate_docs.py new file mode 100755 index 0000000..5f1c1e0 --- /dev/null +++ b/graphdoc/docs/generate_docs.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +""" +Script to automatically generate Sphinx documentation RST files. +Run this script before building the documentation to ensure all RST files are up-to-date. +""" +import os +import shutil +import subprocess +import sys + + +def main(): + # Get the directory where this script is located + docs_dir = os.path.dirname(os.path.abspath(__file__)) + + # The path to the module we want to document + module_dir = os.path.abspath(os.path.join(docs_dir, "..")) + + # Where to output the rst files + output_dir = docs_dir + + # Clean up existing RST files except for special ones + preserve_files = ["index.rst", "conf.py", "generate_docs.py", "links.rst"] + for filename in os.listdir(output_dir): + filepath = os.path.join(output_dir, filename) + if ( + filename.endswith(".rst") + and filename not in preserve_files + and os.path.isfile(filepath) + ): + print(f"Removing {filepath}") + os.unlink(filepath) + + # Run sphinx-apidoc + subprocess.run( + [ + "sphinx-apidoc", + "-f", # Force overwriting of existing files + "-e", # Put module documentation before submodule documentation + "-M", # Put module documentation before member documentation + "-o", + output_dir, # Output directory + module_dir, # Module directory + "setup.py", # Exclude these files/patterns + "*tests*", + "*venv*", + "*docs*", + ] + ) + + # Add custom content to the module RST files + customize_rst_files(output_dir) + + print("\nRST files have been generated successfully!") + print("You can now build the documentation with: cd docs && make html") + + +def customize_rst_files(output_dir): + """Add custom content to the RST files.""" + # Example: Add a note about auto-generation to each RST file + for filename in os.listdir(output_dir): + if filename.endswith(".rst") and filename != "index.rst": + filepath = os.path.join(output_dir, filename) + with open(filepath, "r") as f: + content = f.read() + + # Add noindex to submodules to prevent duplicates + content = content.replace( + ":show-inheritance:", ":show-inheritance:\n :noindex:" + ) + + with open(filepath, "w") as f: + f.write(content) + + # Create or update index.rst if it doesn't exist + index_path = os.path.join(output_dir, "index.rst") + if not os.path.exists(index_path): + with open(index_path, "w") as f: + f.write( + """.. GraphDoc documentation master file + +Welcome to GraphDoc's documentation +================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` +""" + ) + + +if __name__ == "__main__": + main() diff --git a/graphdoc/docs/graphdoc.config.rst b/graphdoc/docs/graphdoc.config.rst new file mode 100644 index 0000000..4379af5 --- /dev/null +++ b/graphdoc/docs/graphdoc.config.rst @@ -0,0 +1,8 @@ +graphdoc.config module +====================== + +.. automodule:: graphdoc.config + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst new file mode 100644 index 0000000..86f9ac0 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.dspy_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.dspy\_data\_helper module +================================================== + +.. automodule:: graphdoc.data.dspy_data.dspy_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst new file mode 100644 index 0000000..0105afa --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.generation_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.generation\_data\_helper module +======================================================== + +.. automodule:: graphdoc.data.dspy_data.generation_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst b/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst new file mode 100644 index 0000000..f594ba3 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.dspy_data.quality_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.dspy\_data.quality\_data\_helper module +===================================================== + +.. automodule:: graphdoc.data.dspy_data.quality_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.dspy_data.rst b/graphdoc/docs/graphdoc.data.dspy_data.rst index 75d31db..4173822 100644 --- a/graphdoc/docs/graphdoc.data.dspy_data.rst +++ b/graphdoc/docs/graphdoc.data.dspy_data.rst @@ -1,37 +1,18 @@ graphdoc.data.dspy\_data package ================================ -Submodules ----------- - -graphdoc.data.dspy\_data.dspy\_data\_helper module --------------------------------------------------- - -.. automodule:: graphdoc.data.dspy_data.dspy_data_helper - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.dspy\_data.generation\_data\_helper module --------------------------------------------------------- - -.. automodule:: graphdoc.data.dspy_data.generation_data_helper +.. automodule:: graphdoc.data.dspy_data :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.data.dspy\_data.quality\_data\_helper module ------------------------------------------------------ - -.. automodule:: graphdoc.data.dspy_data.quality_data_helper - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data.dspy_data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.dspy_data.dspy_data_helper + graphdoc.data.dspy_data.generation_data_helper + graphdoc.data.dspy_data.quality_data_helper diff --git a/graphdoc/docs/graphdoc.data.helper.rst b/graphdoc/docs/graphdoc.data.helper.rst new file mode 100644 index 0000000..69f5dc6 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.helper module +=========================== + +.. automodule:: graphdoc.data.helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.local.rst b/graphdoc/docs/graphdoc.data.local.rst new file mode 100644 index 0000000..a724702 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.local.rst @@ -0,0 +1,8 @@ +graphdoc.data.local module +========================== + +.. automodule:: graphdoc.data.local + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst b/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst new file mode 100644 index 0000000..4729e54 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.mlflow_data.mlflow_data_helper.rst @@ -0,0 +1,8 @@ +graphdoc.data.mlflow\_data.mlflow\_data\_helper module +====================================================== + +.. automodule:: graphdoc.data.mlflow_data.mlflow_data_helper + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.mlflow_data.rst b/graphdoc/docs/graphdoc.data.mlflow_data.rst index 7505d93..71330b8 100644 --- a/graphdoc/docs/graphdoc.data.mlflow_data.rst +++ b/graphdoc/docs/graphdoc.data.mlflow_data.rst @@ -1,21 +1,16 @@ graphdoc.data.mlflow\_data package ================================== -Submodules ----------- - -graphdoc.data.mlflow\_data.mlflow\_data\_helper module ------------------------------------------------------- - -.. automodule:: graphdoc.data.mlflow_data.mlflow_data_helper +.. automodule:: graphdoc.data.mlflow_data :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data.mlflow_data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.mlflow_data.mlflow_data_helper diff --git a/graphdoc/docs/graphdoc.data.parser.rst b/graphdoc/docs/graphdoc.data.parser.rst new file mode 100644 index 0000000..93dfdef --- /dev/null +++ b/graphdoc/docs/graphdoc.data.parser.rst @@ -0,0 +1,8 @@ +graphdoc.data.parser module +=========================== + +.. automodule:: graphdoc.data.parser + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.data.rst b/graphdoc/docs/graphdoc.data.rst index 9f059b8..ab4e00d 100644 --- a/graphdoc/docs/graphdoc.data.rst +++ b/graphdoc/docs/graphdoc.data.rst @@ -1,6 +1,12 @@ graphdoc.data package ===================== +.. automodule:: graphdoc.data + :members: + :undoc-members: + :show-inheritance: + :noindex: + Subpackages ----------- @@ -13,42 +19,10 @@ Subpackages Submodules ---------- -graphdoc.data.helper module ---------------------------- - -.. automodule:: graphdoc.data.helper - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.local module --------------------------- - -.. automodule:: graphdoc.data.local - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.parser module ---------------------------- - -.. automodule:: graphdoc.data.parser - :members: - :undoc-members: - :show-inheritance: - -graphdoc.data.schema module ---------------------------- - -.. automodule:: graphdoc.data.schema - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.data - :members: - :undoc-members: - :show-inheritance: + graphdoc.data.helper + graphdoc.data.local + graphdoc.data.parser + graphdoc.data.schema diff --git a/graphdoc/docs/graphdoc.data.schema.rst b/graphdoc/docs/graphdoc.data.schema.rst new file mode 100644 index 0000000..6d1eeb4 --- /dev/null +++ b/graphdoc/docs/graphdoc.data.schema.rst @@ -0,0 +1,8 @@ +graphdoc.data.schema module +=========================== + +.. automodule:: graphdoc.data.schema + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst b/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst new file mode 100644 index 0000000..46f1bcd --- /dev/null +++ b/graphdoc/docs/graphdoc.eval.doc_generator_eval.rst @@ -0,0 +1,8 @@ +graphdoc.eval.doc\_generator\_eval module +========================================= + +.. automodule:: graphdoc.eval.doc_generator_eval + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.eval.rst b/graphdoc/docs/graphdoc.eval.rst index 8b095e3..cdebde8 100644 --- a/graphdoc/docs/graphdoc.eval.rst +++ b/graphdoc/docs/graphdoc.eval.rst @@ -1,21 +1,16 @@ graphdoc.eval package ===================== -Submodules ----------- - -graphdoc.eval.doc\_generator\_eval module ------------------------------------------ - -.. automodule:: graphdoc.eval.doc_generator_eval +.. automodule:: graphdoc.eval :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.eval - :members: - :undoc-members: - :show-inheritance: + graphdoc.eval.doc_generator_eval diff --git a/graphdoc/docs/graphdoc.main.rst b/graphdoc/docs/graphdoc.main.rst new file mode 100644 index 0000000..7dc5c2f --- /dev/null +++ b/graphdoc/docs/graphdoc.main.rst @@ -0,0 +1,8 @@ +graphdoc.main module +==================== + +.. automodule:: graphdoc.main + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.modules.doc_generator_module.rst b/graphdoc/docs/graphdoc.modules.doc_generator_module.rst new file mode 100644 index 0000000..795ce3a --- /dev/null +++ b/graphdoc/docs/graphdoc.modules.doc_generator_module.rst @@ -0,0 +1,8 @@ +graphdoc.modules.doc\_generator\_module module +============================================== + +.. automodule:: graphdoc.modules.doc_generator_module + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.modules.rst b/graphdoc/docs/graphdoc.modules.rst index b8bfd19..3a3b65e 100644 --- a/graphdoc/docs/graphdoc.modules.rst +++ b/graphdoc/docs/graphdoc.modules.rst @@ -1,21 +1,16 @@ graphdoc.modules package ======================== -Submodules ----------- - -graphdoc.modules.doc\_generator\_module module ----------------------------------------------- - -.. automodule:: graphdoc.modules.doc_generator_module +.. automodule:: graphdoc.modules :members: :undoc-members: :show-inheritance: + :noindex: + +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.modules - :members: - :undoc-members: - :show-inheritance: + graphdoc.modules.doc_generator_module diff --git a/graphdoc/docs/graphdoc.prompts.rst b/graphdoc/docs/graphdoc.prompts.rst index cf530c9..5ffc90f 100644 --- a/graphdoc/docs/graphdoc.prompts.rst +++ b/graphdoc/docs/graphdoc.prompts.rst @@ -1,37 +1,18 @@ graphdoc.prompts package ======================== -Submodules ----------- - -graphdoc.prompts.schema\_doc\_generation module ------------------------------------------------ - -.. automodule:: graphdoc.prompts.schema_doc_generation - :members: - :undoc-members: - :show-inheritance: - -graphdoc.prompts.schema\_doc\_quality module --------------------------------------------- - -.. automodule:: graphdoc.prompts.schema_doc_quality +.. automodule:: graphdoc.prompts :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.prompts.single\_prompt module --------------------------------------- - -.. automodule:: graphdoc.prompts.single_prompt - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.prompts - :members: - :undoc-members: - :show-inheritance: + graphdoc.prompts.schema_doc_generation + graphdoc.prompts.schema_doc_quality + graphdoc.prompts.single_prompt diff --git a/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst b/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst new file mode 100644 index 0000000..6c0b4bc --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.schema_doc_generation.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.schema\_doc\_generation module +=============================================== + +.. automodule:: graphdoc.prompts.schema_doc_generation + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst b/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst new file mode 100644 index 0000000..cecd62b --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.schema_doc_quality.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.schema\_doc\_quality module +============================================ + +.. automodule:: graphdoc.prompts.schema_doc_quality + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.prompts.single_prompt.rst b/graphdoc/docs/graphdoc.prompts.single_prompt.rst new file mode 100644 index 0000000..1a614ef --- /dev/null +++ b/graphdoc/docs/graphdoc.prompts.single_prompt.rst @@ -0,0 +1,8 @@ +graphdoc.prompts.single\_prompt module +====================================== + +.. automodule:: graphdoc.prompts.single_prompt + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.rst b/graphdoc/docs/graphdoc.rst index 502daff..1425159 100644 --- a/graphdoc/docs/graphdoc.rst +++ b/graphdoc/docs/graphdoc.rst @@ -1,6 +1,12 @@ graphdoc package ================ +.. automodule:: graphdoc + :members: + :undoc-members: + :show-inheritance: + :noindex: + Subpackages ----------- @@ -16,26 +22,8 @@ Subpackages Submodules ---------- -graphdoc.config module ----------------------- - -.. automodule:: graphdoc.config - :members: - :undoc-members: - :show-inheritance: - -graphdoc.main module --------------------- - -.. automodule:: graphdoc.main - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc - :members: - :undoc-members: - :show-inheritance: + graphdoc.config + graphdoc.main diff --git a/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst b/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst new file mode 100644 index 0000000..d3d8ab9 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.doc_generator_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.doc\_generator\_trainer module +============================================= + +.. automodule:: graphdoc.train.doc_generator_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst b/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst new file mode 100644 index 0000000..596f769 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.doc_quality_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.doc\_quality\_trainer module +=========================================== + +.. automodule:: graphdoc.train.doc_quality_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.optimizers.rst b/graphdoc/docs/graphdoc.train.optimizers.rst new file mode 100644 index 0000000..b99ad09 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.optimizers.rst @@ -0,0 +1,8 @@ +graphdoc.train.optimizers module +================================ + +.. automodule:: graphdoc.train.optimizers + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/graphdoc.train.rst b/graphdoc/docs/graphdoc.train.rst index d6d3f32..f87cd03 100644 --- a/graphdoc/docs/graphdoc.train.rst +++ b/graphdoc/docs/graphdoc.train.rst @@ -1,45 +1,19 @@ graphdoc.train package ====================== -Submodules ----------- - -graphdoc.train.doc\_generator\_trainer module ---------------------------------------------- - -.. automodule:: graphdoc.train.doc_generator_trainer - :members: - :undoc-members: - :show-inheritance: - -graphdoc.train.doc\_quality\_trainer module -------------------------------------------- - -.. automodule:: graphdoc.train.doc_quality_trainer - :members: - :undoc-members: - :show-inheritance: - -graphdoc.train.optimizers module --------------------------------- - -.. automodule:: graphdoc.train.optimizers +.. automodule:: graphdoc.train :members: :undoc-members: :show-inheritance: + :noindex: -graphdoc.train.single\_prompt\_trainer module ---------------------------------------------- - -.. automodule:: graphdoc.train.single_prompt_trainer - :members: - :undoc-members: - :show-inheritance: +Submodules +---------- -Module contents ---------------- +.. toctree:: + :maxdepth: 4 -.. automodule:: graphdoc.train - :members: - :undoc-members: - :show-inheritance: + graphdoc.train.doc_generator_trainer + graphdoc.train.doc_quality_trainer + graphdoc.train.optimizers + graphdoc.train.single_prompt_trainer diff --git a/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst b/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst new file mode 100644 index 0000000..4587e46 --- /dev/null +++ b/graphdoc/docs/graphdoc.train.single_prompt_trainer.rst @@ -0,0 +1,8 @@ +graphdoc.train.single\_prompt\_trainer module +============================================= + +.. automodule:: graphdoc.train.single_prompt_trainer + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/index.rst b/graphdoc/docs/index.rst index 3db22af..73eee52 100644 --- a/graphdoc/docs/index.rst +++ b/graphdoc/docs/index.rst @@ -16,7 +16,6 @@ submit a PR, if you have any questions or feedback. Thanks! License: `Apache License 2.0 `_ - .. toctree:: :maxdepth: 2 :caption: Contents: diff --git a/graphdoc/docs/modules.rst b/graphdoc/docs/modules.rst index 06251c7..6a8e6e1 100644 --- a/graphdoc/docs/modules.rst +++ b/graphdoc/docs/modules.rst @@ -5,3 +5,4 @@ graphdoc :maxdepth: 4 graphdoc + tests diff --git a/graphdoc/docs/source/graphdoc.data.rst b/graphdoc/docs/source/graphdoc.data.rst index 9d5cba5..bebca7d 100644 --- a/graphdoc/docs/source/graphdoc.data.rst +++ b/graphdoc/docs/source/graphdoc.data.rst @@ -11,6 +11,7 @@ graphdoc.data.helper module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.local module -------------------------- @@ -19,6 +20,7 @@ graphdoc.data.local module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.parser module --------------------------- @@ -27,6 +29,7 @@ graphdoc.data.parser module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.data.schema module --------------------------- @@ -35,6 +38,7 @@ graphdoc.data.schema module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -43,3 +47,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/graphdoc.prompts.rst b/graphdoc/docs/source/graphdoc.prompts.rst index cf530c9..e6308e0 100644 --- a/graphdoc/docs/source/graphdoc.prompts.rst +++ b/graphdoc/docs/source/graphdoc.prompts.rst @@ -11,6 +11,7 @@ graphdoc.prompts.schema\_doc\_generation module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.prompts.schema\_doc\_quality module -------------------------------------------- @@ -19,6 +20,7 @@ graphdoc.prompts.schema\_doc\_quality module :members: :undoc-members: :show-inheritance: + :no-index: graphdoc.prompts.single\_prompt module -------------------------------------- @@ -27,6 +29,7 @@ graphdoc.prompts.single\_prompt module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -35,3 +38,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/graphdoc.rst b/graphdoc/docs/source/graphdoc.rst index aae48c1..bd10c9e 100644 --- a/graphdoc/docs/source/graphdoc.rst +++ b/graphdoc/docs/source/graphdoc.rst @@ -20,6 +20,7 @@ graphdoc.main module :members: :undoc-members: :show-inheritance: + :no-index: Module contents --------------- @@ -28,3 +29,4 @@ Module contents :members: :undoc-members: :show-inheritance: + :no-index: diff --git a/graphdoc/docs/source/modules.rst b/graphdoc/docs/source/modules.rst deleted file mode 100644 index 06251c7..0000000 --- a/graphdoc/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -graphdoc -======== - -.. toctree:: - :maxdepth: 4 - - graphdoc diff --git a/graphdoc/docs/tests.conftest.rst b/graphdoc/docs/tests.conftest.rst new file mode 100644 index 0000000..6755d2b --- /dev/null +++ b/graphdoc/docs/tests.conftest.rst @@ -0,0 +1,8 @@ +tests.conftest module +===================== + +.. automodule:: tests.conftest + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/tests.rst b/graphdoc/docs/tests.rst new file mode 100644 index 0000000..c201516 --- /dev/null +++ b/graphdoc/docs/tests.rst @@ -0,0 +1,18 @@ +tests package +============= + +.. automodule:: tests + :members: + :undoc-members: + :show-inheritance: + :noindex: + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + tests.conftest + tests.test_confest + tests.test_config diff --git a/graphdoc/docs/tests.test_confest.rst b/graphdoc/docs/tests.test_confest.rst new file mode 100644 index 0000000..c13b703 --- /dev/null +++ b/graphdoc/docs/tests.test_confest.rst @@ -0,0 +1,8 @@ +tests.test\_confest module +========================== + +.. automodule:: tests.test_confest + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/docs/tests.test_config.rst b/graphdoc/docs/tests.test_config.rst new file mode 100644 index 0000000..97c1aa6 --- /dev/null +++ b/graphdoc/docs/tests.test_config.rst @@ -0,0 +1,8 @@ +tests.test\_config module +========================= + +.. automodule:: tests.test_config + :members: + :undoc-members: + :show-inheritance: + :noindex: diff --git a/graphdoc/graphdoc/config.py b/graphdoc/graphdoc/config.py index 9f4dcd0..afe813a 100644 --- a/graphdoc/graphdoc/config.py +++ b/graphdoc/graphdoc/config.py @@ -35,23 +35,31 @@ def mlflow_data_helper_from_dict(mlflow_config: dict) -> MlflowDataHelper: - """Load a mlflow data helper from a dictionary of parameters. + """Load a MLflow data helper from a dictionary of parameters. - :param mlflow_config: Dictionary containing mlflow parameters. - :type mlflow_config: dict + The following keys are expected: + - mlflow_tracking_uri + - mlflow_tracking_username (optional) + - mlflow_tracking_password (optional) + + .. code-block:: python - .. code-block:: json { "mlflow_tracking_uri": "http://localhost:5000", "mlflow_tracking_username": "admin", "mlflow_tracking_password": "password" } + :param mlflow_config: Dictionary containing MLflow parameters. + :type mlflow_config: dict + :return: A MlflowDataHelper object. + :rtype: MlflowDataHelper + """ return MlflowDataHelper( mlflow_tracking_uri=mlflow_config["mlflow_tracking_uri"], - mlflow_tracking_username=mlflow_config["mlflow_tracking_username"], - mlflow_tracking_password=mlflow_config["mlflow_tracking_password"], + mlflow_tracking_username=mlflow_config.get("mlflow_tracking_username", None), + mlflow_tracking_password=mlflow_config.get("mlflow_tracking_password", None), ) @@ -62,6 +70,7 @@ def mlflow_data_helper_from_yaml(yaml_path: Union[str, Path]) -> MlflowDataHelpe :type yaml_path: Union[str, Path] .. code-block:: yaml + mlflow: mlflow_tracking_uri: !env MLFLOW_TRACKING_URI # The tracking URI for MLflow mlflow_tracking_username: !env MLFLOW_TRACKING_USERNAME # The username for the mlflow tracking server @@ -81,6 +90,7 @@ def trainset_from_dict(trainset_dict: dict) -> List[dspy.Example]: """Load a trainset from a dictionary of parameters. .. code-block:: yaml + { "hf_api_key": !env HF_DATASET_KEY, # Must be a valid Hugging # Face API key @@ -151,6 +161,7 @@ def trainset_from_yaml(yaml_path: Union[str, Path]) -> List[dspy.Example]: """Load a trainset from a YAML file. .. code-block:: yaml + data: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -189,10 +200,12 @@ def split_trainset( ) -> tuple[List[dspy.Example], List[dspy.Example]]: """Split a trainset into a trainset and evalset. - :param trainset: The trainset to split. :type trainset: List[dspy.Example] - :param evalset_ratio: The proportionate size of the evalset. :type - evalset_ratio: float :return: A tuple of trainset and evalset. :rtype: - tuple[List[dspy.Example], List[dspy.Example]] + :param trainset: The trainset to split. + :type trainset: List[dspy.Example] + :param evalset_ratio: The proportionate size of the evalset. + :type evalset_ratio: float + :return: A tuple of trainset and evalset. + :rtype: tuple[List[dspy.Example], List[dspy.Example]] """ random.seed(seed) @@ -209,6 +222,7 @@ def trainset_and_evalset_from_yaml( """Load a trainset and evalset from a YAML file. .. code-block:: yaml + data: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -253,28 +267,25 @@ def single_prompt_from_dict( ) -> SinglePrompt: """Load a single prompt from a dictionary of parameters. - .. code-block:: json + .. code-block:: python + { "prompt": "doc_quality", # Which prompt signature to use "class": "SchemaDocQualityPrompt", # Must be a child of SinglePrompt - "type": "predict", # The type of prompt to use - # (predict, chain_of_thought) - "metric": "rating", # The type of metric to use - # (rating, category) - "load_from_mlflow": false, # Whether to load the prompt from an MLFlow URI + "type": "predict", # Must be one of predict, generate + "metric": "rating", # The metric to use for evaluation + "load_from_mlflow": false, # Whether to load the prompt from MLflow "model_uri": null, # The tracking URI for MLflow "model_name": null, # The name of the model in MLflow "model_version": null # The version of the model in MLflow - "prompt_metric": False # Whether another prompt is used - # to calculate the metric - # (in which case we must also load that prompt) } - :param prompt_dict: Dictionary containing prompt information. + :param prompt_dict: Dictionary containing prompt parameters. :type prompt_dict: dict - :param prompt_metric: The metric to use to calculate the metric. - Can be another prompt signature or a string. + :param prompt_metric: The prompt to use for the metric. :type prompt_metric: Union[str, SinglePrompt] + :param mlflow_dict: Dictionary containing MLflow parameters. + :type mlflow_dict: Optional[dict] :return: A SinglePrompt object. :rtype: SinglePrompt @@ -307,6 +318,7 @@ def single_prompt_from_yaml(yaml_path: Union[str, Path]) -> SinglePrompt: """Load a single prompt from a YAML file. .. code-block:: yaml + prompt: prompt: base_doc_gen # Which prompt signature to use class: DocGeneratorPrompt # Must be a child of SinglePrompt @@ -367,7 +379,8 @@ def single_trainer_from_dict( ) -> SinglePromptTrainer: """Load a single trainer from a dictionary of parameters. - .. code-block:: json + .. code-block:: python + { "trainer": { "class": "DocQualityTrainer", @@ -418,6 +431,7 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer """Load a single prompt trainer from a YAML file. .. code-block:: yaml + trainer: hf_api_key: !env HF_DATASET_KEY # Must be a valid Hugging Face API key # (with permission to access graphdoc) @@ -450,21 +464,6 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer model_version: null # The version of the model in MLflow prompt_metric: true # Whether another prompt is used # to calculate the metric - # (in which case we must load prompt) - - prompt_metric: - prompt: doc_quality # The prompt to use to calculate the metric - class: DocQualityPrompt # The class of the prompt to use - # to calculate the metric - type: predict # The type of prompt to use - # to calculate the metric - metric: rating # The metric to use to calculate - # the metric - load_from_mlflow: false # Whether to load the prompt - # from an MLFlow URI - model_uri: null # The tracking URI for MLflow - model_name: null # The name of the model in MLflow - model_version: null # The version of the model in MLflow :param yaml_path: Path to the YAML file. :type yaml_path: Union[str, Path] @@ -488,9 +487,10 @@ def single_trainer_from_yaml(yaml_path: Union[str, Path]) -> SinglePromptTrainer def doc_generator_module_from_dict( module_dict: dict, prompt: Union[DocGeneratorPrompt, SinglePrompt] ) -> DocGeneratorModule: - """Load a doc generator module from a dictionary of parameters. + """Load a single doc generator module from a dictionary of parameters. + + .. code-block:: python - .. code-block:: json { "retry": true, "retry_limit": 1, @@ -519,6 +519,7 @@ def doc_generator_module_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorM """Load a doc generator module from a YAML file. .. code-block:: yaml + prompt: prompt: base_doc_gen # Which prompt signature to use class: DocGeneratorPrompt # Must be a child of SinglePrompt @@ -575,6 +576,7 @@ def doc_generator_eval_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorEva """Load a doc generator evaluator from a YAML file. .. code-block:: yaml + mlflow: mlflow_tracking_uri: !env MLFLOW_TRACKING_URI # The tracking URI for MLflow mlflow_tracking_username: !env MLFLOW_TRACKING_USERNAME # The username for the mlflow tracking server @@ -630,7 +632,7 @@ def doc_generator_eval_from_yaml(yaml_path: Union[str, Path]) -> DocGeneratorEva # load the eval config mdh = mlflow_data_helper_from_yaml(yaml_path) # noqa: F841 - mlflow_tracking_uri = config["mlflow"]["mlflow_tracking_uri"] + mlflow_tracking_uri = config["eval"]["mlflow_tracking_uri"] mlflow_experiment_name = config["eval"]["mlflow_experiment_name"] generator_prediction_field = config["eval"]["generator_prediction_field"] evaluator_prediction_field = config["eval"]["evaluator_prediction_field"] diff --git a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py index 3625faa..413c623 100644 --- a/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py +++ b/graphdoc/graphdoc/data/dspy_data/quality_data_helper.py @@ -22,13 +22,14 @@ class QualityDataHelper(DspyDataHelper): dspy.Signature. The example signature is defined as: - ``` - database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) - rating: Literal[4, 3, 2, 1] = dspy.OutputField() - ``` + + .. code-block:: python + + database_schema: str = dspy.InputField() + category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( + dspy.OutputField() + ) + rating: Literal[4, 3, 2, 1] = dspy.OutputField() """ diff --git a/graphdoc/graphdoc/data/helper.py b/graphdoc/graphdoc/data/helper.py index 2c3887d..00f3a7b 100644 --- a/graphdoc/graphdoc/data/helper.py +++ b/graphdoc/graphdoc/data/helper.py @@ -39,8 +39,11 @@ def check_directory_path(directory_path: Union[str, Path]) -> None: def check_file_path(file_path: Union[str, Path]) -> None: """Check if the provided path resolves to a valid file. - :param file_path: The path to check. :type file_path: Union[str, Path] :raises - ValueError: If the path does not resolve to a valid file. :return: None :rtype: None + :param file_path: The path to check. + :type file_path: Union[str, Path] + :raises ValueError: If the path does not resolve to a valid file. + :return: None + :rtype: None """ _file_path = Path(file_path).resolve() diff --git a/graphdoc/graphdoc/data/local.py b/graphdoc/graphdoc/data/local.py index 4072177..f2bb54a 100644 --- a/graphdoc/graphdoc/data/local.py +++ b/graphdoc/graphdoc/data/local.py @@ -68,10 +68,14 @@ def schema_objects_from_folder( ) -> dict[str, SchemaObject]: """Load schemas from a folder, keeping the difficulty tag. - :param folder_path: The path to the folder containing the schemas :type - folder_path: Union[str, Path] :param category: The category of the schemas :type - category: str :param rating: The rating of the schemas :type rating: int - :return: A dictionary of schemas :rtype: dict[str, SchemaObject] + :param category: The category of the schemas + :type category: str + :param rating: The rating of the schemas + :type rating: int + :param folder_path: The path to the folder containing the schemas + :type folder_path: Union[str, Path] + :return: A dictionary of schemas + :rtype: dict[str, SchemaObject] """ check_directory_path(folder_path) @@ -147,11 +151,16 @@ def folder_to_dataset( ) -> Dataset: """Load a folder of schemas, keeping the difficulty tag. - :param category: The category of the schemas :type category: str :param - folder_path: The path to the folder containing the schemas :type folder_path: - Union[str, Path] :param parse_objects: Whether to parse the objects from the - schemas :type parse_objects: bool :param type_mapping: A dictionary mapping - types to strings :type type_mapping: Optional[dict[type, str]] + :param category: The category of the schemas + :type category: str + :param folder_path: The path to the folder containing the schemas + :type folder_path: Union[str, Path] + :param parse_objects: Whether to parse the objects from the schemas + :type parse_objects: bool + :param type_mapping: A dictionary mapping types to strings + :type type_mapping: Optional[dict[type, str]] + :return: A dataset containing the schemas + :rtype: Dataset """ objects = [] @@ -183,10 +192,14 @@ def folder_of_folders_to_dataset( """Load a folder of folders containing schemas, keeping the difficulty tag. :param folder_paths: Enum class defining folder paths, defaults to - SchemaCategoryPath. Must have a get_path method. :type folder_paths: Type[Enum] - :param parse_objects: Whether to parse the objects from the schemas :type - parse_objects: bool :param type_mapping: A dictionary mapping graphql-ast node - values to strings + SchemaCategoryPath. Must have a get_path method. + :type folder_paths: Type[Enum] + :param parse_objects: Whether to parse the objects from the schemas + :type parse_objects: bool + :param type_mapping: A dictionary mapping graphql-ast node values to strings + :type type_mapping: Optional[dict[type, str]] + :return: A dataset containing the schemas + :rtype: Dataset """ schema_objects = self.schema_objects_from_folder_of_folders( diff --git a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py index 161e989..2b14c18 100644 --- a/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py +++ b/graphdoc/graphdoc/data/mlflow_data/mlflow_data_helper.py @@ -30,11 +30,12 @@ def __init__( ): """A helper class for loading and saving models and metadata from mlflow. - :param mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: Union[str, Path] :param mlflow_tracking_username: The - username for the mlflow tracking server. :type mlflow_tracking_username: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: Union[str, Path] + :param mlflow_tracking_username: The username for the mlflow tracking server. + :type mlflow_tracking_username: Optional[str] :param mlflow_tracking_password: The password for the mlflow tracking server. - :type mlflow_tracking_password: str + :type mlflow_tracking_password: Optional[str] """ self.mlflow_tracking_uri = mlflow_tracking_uri @@ -90,8 +91,10 @@ def latest_model_version(self, model_name: str): def model_by_name_and_version(self, model_name: str, model_version: str): """Load a model from mlflow by name and version. - :param model_name: The name of the model to load. :type model_name: str :param - model_version: The version of the model to load. :type model_version: str + :param model_name: The name of the model to load. + :type model_name: str + :param model_version: The version of the model to load. + :type model_version: str :return: The loaded model. """ @@ -139,10 +142,12 @@ def save_model( ): """Save a model to mlflow. - :param model: The model to save. :type model: dspy.Signature :param - model_signature: The signature of the model. :type model_signature: - ModelSignature :param model_name: The name of the model to save. :type - model_name: str + :param model: The model to save. + :type model: dspy.Signature + :param model_signature: The signature of the model. + :type model_signature: ModelSignature + :param model_name: The name of the model to save. + :type model_name: str """ mlflow.dspy.log_model( diff --git a/graphdoc/graphdoc/data/parser.py b/graphdoc/graphdoc/data/parser.py index 2a53a71..866446f 100644 --- a/graphdoc/graphdoc/data/parser.py +++ b/graphdoc/graphdoc/data/parser.py @@ -48,9 +48,12 @@ def _check_node_type( ) -> str: """Check the type of a schema node. - :param node: The schema node to check :type node: Node :param type_mapping: - Custom mapping of node types to strings. Defaults to DEFAULT_NODE_TYPES :type - type_mapping: Optional[dict[type, str]] :return: The type of the schema node + :param node: The schema node to check + :type node: Node + :param type_mapping: Custom mapping of node types to strings. Defaults to + DEFAULT_NODE_TYPES + :type type_mapping: Optional[dict[type, str]] + :return: The type of the schema node :rtype: str """ @@ -65,11 +68,10 @@ def parse_schema_from_file( ) -> DocumentNode: """Parse a schema from a file. - :param schema_file: The name of the schema file :type schema_ - file: - str + :param schema_file: The name of the schema file + :type schema_file: Union[str, Path] :param schema_directory_path: A path to a directory containing schemas - :type schema_directory_path: str + :type schema_directory_path: Optional[Union[str, Path]] :return: The parsed schema :rtype: DocumentNode :raises Exception: If the schema cannot be parsed @@ -183,12 +185,18 @@ def fill_empty_descriptions( with the new column or table value. Do not update descriptions that already have a value. Default values are provided for the new column and table descriptions. - :param node: The GraphQL node to update :type node: Node :param - new_column_value: The new column description value :type new_column_value: str - :param new_table_value: The new table description value :type new_table_value: - str :param use_value_name: Whether to use the value name in the description - :type use_value_name: bool :param value_name: The name of the value :type - value_name: Optional[str] :return: The updated node :rtype: Node + :param node: The GraphQL node to update + :type node: Node + :param new_column_value: The new column description value + :type new_column_value: str + :param new_table_value: The new table description value + :type new_table_value: str + :param use_value_name: Whether to use the value name in the description + :type use_value_name: bool + :param value_name: The name of the value + :type value_name: Optional[str] + :return: The updated node + :rtype: Node """ if hasattr(node, "description"): # and node.description == None: @@ -267,9 +275,12 @@ def schema_equality_check(gold_node: Node, check_node: Node) -> bool: """A method to check if two schema nodes are equal. Only checks that the schemas structures are equal, not the descriptions. - :param gold_node: The gold standard schema node :type gold_node: Node :param - check_node: The schema node to check :type check_node: Node :return: Whether the - schemas are equal :rtype: bool + :param gold_node: The gold standard schema node + :type gold_node: Node + :param check_node: The schema node to check + :type check_node: Node + :return: Whether the schemas are equal + :rtype: bool """ gold_node_copy = copy.deepcopy(gold_node) @@ -314,10 +325,13 @@ def parse_objects_from_full_schema_object( ) -> Union[dict[str, SchemaObject], None]: """Parse out all available tables from a full schema object. - :param schema: The full schema object to parse :type schema: SchemaObject :param - type_mapping: Custom mapping of node types to strings. Defaults to - DEFAULT_NODE_TYPES :type type_mapping: Optional[dict[type, str]] :return: The - parsed objects (tables and enums) :rtype: Union[dict, None] + :param schema: The full schema object to parse + :type schema: SchemaObject + :param type_mapping: Custom mapping of node types to strings. Defaults to + DEFAULT_NODE_TYPES + :type type_mapping: Optional[dict[type, str]] + :return: The parsed objects (tables and enums) + :rtype: Union[dict, None] """ if schema.schema_ast is None: diff --git a/graphdoc/graphdoc/data/schema.py b/graphdoc/graphdoc/data/schema.py index 2d49810..f0a21ed 100644 --- a/graphdoc/graphdoc/data/schema.py +++ b/graphdoc/graphdoc/data/schema.py @@ -21,6 +21,8 @@ class SchemaCategory(str, Enum): + """Schema quality categories enumeration.""" + PERFECT = "perfect" ALMOST_PERFECT = "almost perfect" POOR_BUT_CORRECT = "poor but correct" @@ -36,6 +38,8 @@ def from_str(cls, value: str) -> Optional["SchemaCategory"]: class SchemaRating(str, Enum): + """Schema quality ratings enumeration.""" + FOUR = "4" THREE = "3" TWO = "2" @@ -53,7 +57,7 @@ def from_value(cls, value: Union[str, int]) -> Optional["SchemaRating"]: class SchemaCategoryRatingMapping: - """Maps SchemaCategory to SchemaRating.""" + """Mapping between schema categories and ratings.""" @staticmethod def get_rating(category: SchemaCategory) -> SchemaRating: @@ -91,6 +95,8 @@ def get_category(rating: SchemaRating) -> SchemaCategory: class SchemaType(str, Enum): + """Schema type enumeration.""" + FULL_SCHEMA = "full schema" TABLE_SCHEMA = "table schema" ENUM_SCHEMA = "enum schema" @@ -134,6 +140,8 @@ def get_path( @dataclass class SchemaObject: + """Schema object containing schema data and metadata.""" + key: str category: Optional[Enum] = None rating: Optional[Enum] = None diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index 1b27f42..ee41c5b 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -44,6 +44,9 @@ def __init__( :param rating_threshold: The minimum rating for a generated document to be considered valid. :type rating_threshold: int + :param fill_empty_descriptions: Whether to fill empty descriptions with + generated documentation. + :type fill_empty_descriptions: bool """ super().__init__() @@ -107,8 +110,10 @@ def _retry_by_rating(self, database_schema: str) -> str: """Retry the generation if the quality check fails. Rating threshold is determined at initialization. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: str + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: str """ @@ -239,9 +244,10 @@ def forward(self, database_schema: str) -> dspy.Prediction: """Given a database schema, generate a documented schema. If retry is True, the generation will be retried if the quality check fails. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: - dspy.Prediction + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: dspy.Prediction """ if self.retry: @@ -261,9 +267,10 @@ def document_full_schema( """Given a database schema, parse out the underlying components and document on a per-component basis. - :param database_schema: The database schema to generate documentation for. :type - database_schema: str :return: The generated documentation. :rtype: - dspy.Prediction + :param database_schema: The database schema to generate documentation for. + :type database_schema: str + :return: The generated documentation. + :rtype: dspy.Prediction """ # if we are tracing, make sure make sure we have everything needed to log to mlflow diff --git a/graphdoc/graphdoc/prompts/__init__.py b/graphdoc/graphdoc/prompts/__init__.py index f4aa328..48738b9 100644 --- a/graphdoc/graphdoc/prompts/__init__.py +++ b/graphdoc/graphdoc/prompts/__init__.py @@ -38,13 +38,16 @@ def single_prompt( """Returns an instance of the specified prompt class. Allows for the user to pass in their own dspy signature. - :param prompt: The prompt to use. :type prompt: Union[str, dspy.Signature] - :param prompt_class: The class of the prompt to use. :type prompt_class: str - :param prompt_type: The type of the prompt to use. :type prompt_type: str :param - prompt_metric: The metric to use for the prompt. :type prompt_metric: Union[str, - DocQualityPrompt, SinglePrompt] :return: An instance of the specified prompt - class. :rtype: SinglePrompt - + :param prompt: The prompt to use. + :type prompt: Union[str, dspy.Signature] + :param prompt_class: The class of the prompt to use. + :type prompt_class: str + :param prompt_type: The type of the prompt to use. + :type prompt_type: str + :param prompt_metric: The metric to use for the prompt. + :type prompt_metric: Union[str, DocQualityPrompt, SinglePrompt] + :return: An instance of the specified prompt class. + :rtype: SinglePrompt """ prompt_classes = { "DocQualityPrompt": DocQualityPrompt, diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index ef70ff5..7636933 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -115,6 +115,8 @@ def doc_gen_factory( # Single Prompt Class # ####################### class DocGeneratorPrompt(SinglePrompt): + """DocGeneratorPrompt class for generating documentation for GraphQL schemas.""" + def __init__( self, prompt: Union[str, dspy.Signature, dspy.SignatureMeta], @@ -218,7 +220,10 @@ def compare_metrics( :param base_metrics: The base metrics. :type base_metrics: Any :param optimized_metrics: The optimized metrics. - :type + :type optimized_metrics: Any + :param comparison_value: The value to compare. + :type comparison_value: str + """ if comparison_value == "overall_score": return optimized_metrics.get("overall_score", 0) > base_metrics.get( diff --git a/graphdoc/graphdoc/prompts/schema_doc_quality.py b/graphdoc/graphdoc/prompts/schema_doc_quality.py index a975196..620d6c7 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_quality.py +++ b/graphdoc/graphdoc/prompts/schema_doc_quality.py @@ -20,8 +20,7 @@ # DSPy Signatures # ################### class DocQualitySignature(dspy.Signature): - """ - You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. + """You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. The categories are described as: - perfect (4): The documentation contains enough information so that the interpretation of the schema and its database content is completely free of ambiguity. @@ -33,15 +32,14 @@ class DocQualitySignature(dspy.Signature): """ # noqa: B950 database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) + category: Literal[ + "perfect", "almost perfect", "poor but correct", "incorrect" + ] = dspy.OutputField() rating: Literal[4, 3, 2, 1] = dspy.OutputField() class DocQualityDemonstrationSignature(dspy.Signature): - """ - You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. + """You are evaluating the output of an LLM program, expect hallucinations. Given a GraphQL Schema, evaluate the quality of documentation for that schema and provide a category rating. The categories are described as: - perfect (4): The documentation contains enough information so that the interpretation of the schema and its database content is completely free of ambiguity. @@ -73,9 +71,9 @@ class DocQualityDemonstrationSignature(dspy.Signature): """ # noqa: B950 database_schema: str = dspy.InputField() - category: Literal["perfect", "almost perfect", "poor but correct", "incorrect"] = ( - dspy.OutputField() - ) + category: Literal[ + "perfect", "almost perfect", "poor but correct", "incorrect" + ] = dspy.OutputField() rating: Literal[4, 3, 2, 1] = dspy.OutputField() @@ -107,6 +105,13 @@ def doc_quality_factory( # Single Prompt Class # ####################### class DocQualityPrompt(SinglePrompt): + """DocQualityPrompt class for evaluating documentation quality. + + This is a single prompt that can be used to evaluate the quality of the documentation + for a given schema. This is a wrapper around the SinglePrompt class that implements + the abstract methods. + """ + def __init__( self, prompt: Union[ @@ -120,20 +125,18 @@ def __init__( prompt_metric: Union[Literal["rating", "category"], Callable] = "rating", ) -> None: # TODO: we should think about if we want to add checks on any provided dspy.Signature - """Initialize the DocQualityPrompt. This is a single prompt that can be used to - evaluate the quality of the documentation for a given schema. This is a wrapper - around the SinglePrompt class that implements the abstract methods. + """Initialize the DocQualityPrompt. :param prompt: The prompt to use. Can either be a string that maps to a defined - signature, as set in the doc_quality_factory, or a dspy.Signature. :type prompt: - Union[str, dspy.Signature] :param prompt_type: The type of prompt to use. :type - prompt_type: Union[Literal["predict", "chain_of_thought"], Callable] :param - prompt_metric: The metric to use. Can either be a string that maps to a defined - metric, as set in the doc_quality_factory, or a custom callable function. - Function must have the signature (example: dspy.Example, prediction: - dspy.Prediction) -> bool. :type prompt_metric: Union[Literal["rating", - "category"], Callable] - + signature, as set in the doc_quality_factory, or a dspy.Signature. + :type prompt: Union[str, dspy.Signature] + :param prompt_type: The type of prompt to use. + :type prompt_type: Union[Literal["predict", "chain_of_thought"], Callable] + :param prompt_metric: The metric to use. Can either be a string that maps to a defined + metric, as set in the doc_quality_factory, or a custom callable function. + Function must have the signature (example: dspy.Example, prediction: + dspy.Prediction) -> bool. + :type prompt_metric: Union[Literal["rating", "category"], Callable] """ prompt_signature = doc_quality_factory(prompt) super().__init__( @@ -163,12 +166,14 @@ def evaluate_metric( ) -> bool: """Evaluate the metric for the given example and prediction. - :param example: The example to evaluate the metric on. :type example: - dspy.Example :param prediction: The prediction to evaluate the metric on. :type - prediction: dspy.Prediction :param trace: Used for DSPy. :type trace: Any + :param example: The example to evaluate the metric on. + :type example: dspy.Example + :param prediction: The prediction to evaluate the metric on. + :type prediction: dspy.Prediction + :param trace: Used for DSPy. + :type trace: Any :return: The result of the evaluation. A boolean for if the metric is correct. :rtype: bool - """ evaluation_mapping = { "rating": self._evaluate_rating_metric, @@ -280,10 +285,14 @@ def compare_metrics( """Compare the metrics of the base and optimized models. Returns true if the optimized model is better than the base model. - :param base_metrics: The metrics of the base model. :type base_metrics: Any - :param optimized_metrics: The metrics of the optimized model. :type - optimized_metrics: Any :param comparison_value: The value to compare. - + :param base_metrics: The metrics of the base model. + :type base_metrics: Any + :param optimized_metrics: The metrics of the optimized model. + :type optimized_metrics: Any + :param comparison_value: The value to compare. + :type comparison_value: str + :return: True if the optimized model is better than the base model. + :rtype: bool """ if comparison_value == "overall_score": return optimized_metrics["overall_score"] > base_metrics["overall_score"] diff --git a/graphdoc/graphdoc/prompts/single_prompt.py b/graphdoc/graphdoc/prompts/single_prompt.py index c01f480..20bafb4 100644 --- a/graphdoc/graphdoc/prompts/single_prompt.py +++ b/graphdoc/graphdoc/prompts/single_prompt.py @@ -70,10 +70,12 @@ def evaluate_metric( ) -> Any: """This is the metric used to evalaute the prompt. - :param example: The example to evaluate the metric on. :type example: - dspy.Example :param prediction: The prediction to evaluate the metric on. :type - prediction: dspy.Prediction :param trace: The trace to evaluate the metric on. - This is for DSPy. :type trace: Any + :param example: The example to evaluate the metric on. + :type example: dspy.Example + :param prediction: The prediction to evaluate the metric on. + :type prediction: dspy.Prediction + :param trace: The trace to evaluate the metric on. This is for DSPy. + :type trace: Any """ pass @@ -89,11 +91,14 @@ def format_metric( """This takes the results from the evaluate_evalset and does any necessary formatting, taking into account the metric type. - :param examples: The examples to evaluate the metric on. :type examples: - List[dspy.Example] :param overall_score: The overall score of the metric. :type - overall_score: float :param results: The results from the evaluate_evalset. - :type results: List :param scores: The scores from the evaluate_evalset. :type - scores: List + :param examples: The examples to evaluate the metric on. + :type examples: List[dspy.Example] + :param overall_score: The overall score of the metric. + :type overall_score: float + :param results: The results from the evaluate_evalset. + :type results: List + :param scores: The scores from the evaluate_evalset. + :type scores: List """ pass @@ -108,12 +113,15 @@ def compare_metrics( """Compare the metrics of the base and optimized models. Return true if the optimized model is better than the base model. - :param base_metrics: The metrics of the base model. :type base_metrics: Any - :param optimized_metrics: The metrics of the optimized model. :type - optimized_metrics: Any :param comparison_value: The value to compare the metrics - on. Determines which metric is used to compare the models. :type - comparison_value: str :return: True if the optimized model is better than the - base model, False otherwise. :rtype: bool + :param base_metrics: The metrics of the base model. + :type base_metrics: Any + :param optimized_metrics: The metrics of the optimized model. + :type optimized_metrics: Any + :param comparison_value: The value to compare the metrics on. + Determines which metric is used to compare the models. + :type comparison_value: str + :return: True if the optimized model is better than the base model, False otherwise. + :rtype: bool """ pass @@ -127,13 +135,16 @@ def evaluate_evalset( ) -> Dict[str, Any]: """Take in a list of examples and evaluate the results. - :param examples: The examples to evaluate the results on. :type examples: - List[dspy.Example] :param num_threads: The number of threads to use for - evaluation. :type num_threads: int :param display_progress: Whether to display - the progress of the evaluation. :type display_progress: bool :param - display_table: Whether to display the table of the evaluation. :type - display_table: bool :return: A dictionary containing the overall score, results, - and scores. :rtype: Dict[str, Any] + :param examples: The examples to evaluate the results on. + :type examples: List[dspy.Example] + :param num_threads: The number of threads to use for evaluation. + :type num_threads: int + :param display_progress: Whether to display the progress of the evaluation. + :type display_progress: bool + :param display_table: Whether to display the table of the evaluation. + :type display_table: bool + :return: A dictionary containing the overall score, results, and scores. + :rtype: Dict[str, Any] """ evaluator = dspy.Evaluate( diff --git a/graphdoc/graphdoc/train/doc_generator_trainer.py b/graphdoc/graphdoc/train/doc_generator_trainer.py index 60c0369..2fd4ecd 100644 --- a/graphdoc/graphdoc/train/doc_generator_trainer.py +++ b/graphdoc/graphdoc/train/doc_generator_trainer.py @@ -35,16 +35,22 @@ def __init__( ): """Initialize the DocGeneratorTrainer. - :param prompt: The prompt to train. :type prompt: DocGeneratorPrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. :type trainset: - List[dspy.Example] :param evalset: The evaluation set. :type evalset: - List[dspy.Example] + :param prompt: The prompt to train. + :type prompt: DocGeneratorPrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ super().__init__( @@ -83,9 +89,10 @@ def evaluation_metrics( ) -> None: """Log evaluation metrics to mlflow. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Dict[str, Any] :param optimized_evaluation: The evaluation - metrics of the optimized model. :type optimized_evaluation: Dict[str, Any] + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Dict[str, Any] + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Dict[str, Any] """ base_evaluation_overall_score = self._calculate_average_score(base_evaluation) @@ -140,11 +147,12 @@ def evaluate_training( def train( self, load_model_args: Optional[Dict[str, Any]] = None, save_model: bool = True ): - """Train the document generator model. + """Train the model. If load_model_args is provided, load the model from MLFlow. - :param load_model_args: The arguments to load the model. :type load_model_args: - Optional[Dict[str, Any]] :param save_model: Whether to save the model. :type - save_model: bool :return: The trained model. :rtype: dspy.ChainOfThought + :param load_model_args: The arguments to load the model from mlflow. + :type load_model_args: Optional[Dict[str, Any]] + :param save_model: Whether to save the model to mlflow. + :type save_model: bool """ # if model args are provided, load the model from mlflow diff --git a/graphdoc/graphdoc/train/doc_quality_trainer.py b/graphdoc/graphdoc/train/doc_quality_trainer.py index 442c9fc..7826ace 100644 --- a/graphdoc/graphdoc/train/doc_quality_trainer.py +++ b/graphdoc/graphdoc/train/doc_quality_trainer.py @@ -36,14 +36,22 @@ def __init__( """Initialize the DocQualityTrainer. This is the base class for implementing a trainer for a DocQualityPrompt. - :param prompt: The prompt to train. :type prompt: DocQualityPrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. + :param prompt: The prompt to train. + :type prompt: DocQualityPrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ super().__init__( @@ -64,9 +72,10 @@ def evaluation_metrics(self, base_evaluation, optimized_evaluation): """Log evaluation metrics to mlflow. We will log the overall scores and the per category scores. Per category scores will be logged as a csv file. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the - optimized model. :type optimized_evaluation: Any + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Any + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Any """ base_evaluation_overall_score = base_evaluation["overall_score"] diff --git a/graphdoc/graphdoc/train/single_prompt_trainer.py b/graphdoc/graphdoc/train/single_prompt_trainer.py index 2096400..78e69b8 100644 --- a/graphdoc/graphdoc/train/single_prompt_trainer.py +++ b/graphdoc/graphdoc/train/single_prompt_trainer.py @@ -33,15 +33,22 @@ def __init__( """Initialize the SinglePromptTrainer. This is the base class for implementing a trainer for a single prompt. - :param prompt: The prompt to train. :type prompt: SinglePrompt :param - optimizer_type: The type of optimizer to use. :type optimizer_type: str :param - optimizer_kwargs: The keyword arguments for the optimizer. :type - optimizer_kwargs: Dict[str, Any] :param mlflow_model_name: The name of the model - in mlflow. :type mlflow_model_name: str :param mlflow_experiment_name: The name - of the experiment in mlflow. :type mlflow_experiment_name: str :param - mlflow_tracking_uri: The uri of the mlflow tracking server. :type - mlflow_tracking_uri: str :param trainset: The training set. :type trainset: - List[dspy.Example] + :param prompt: The prompt to train. + :type prompt: SinglePrompt + :param optimizer_type: The type of optimizer to use. + :type optimizer_type: str + :param optimizer_kwargs: The keyword arguments for the optimizer. + :type optimizer_kwargs: Dict[str, Any] + :param mlflow_model_name: The name of the model in mlflow. + :type mlflow_model_name: str + :param mlflow_experiment_name: The name of the experiment in mlflow. + :type mlflow_experiment_name: str + :param mlflow_tracking_uri: The uri of the mlflow tracking server. + :type mlflow_tracking_uri: str + :param trainset: The training set. + :type trainset: List[dspy.Example] + :param evalset: The evaluation set. + :type evalset: List[dspy.Example] """ self.prompt = prompt @@ -73,14 +80,14 @@ def __init__( # Abstract Methods # #################### - # TODO: decide on a return type and implement better type checking for parameters @abstractmethod def evaluation_metrics(self, base_evaluation, optimized_evaluation): """Log evaluation metrics to mlflow. - :param base_evaluation: The evaluation metrics of the base model. :type - base_evaluation: Any :param optimized_evaluation: The evaluation metrics of the - optimized model. :type optimized_evaluation: Any + :param base_evaluation: The evaluation metrics of the base model. + :type base_evaluation: Any + :param optimized_evaluation: The evaluation metrics of the optimized model. + :type optimized_evaluation: Any """ pass diff --git a/graphdoc/run.sh b/graphdoc/run.sh index 112f52a..26c4f46 100755 --- a/graphdoc/run.sh +++ b/graphdoc/run.sh @@ -60,10 +60,16 @@ commit_command() { } # Documentation commands +docs_generate() { + echo "Generating RST files..." + cd docs && python generate_docs.py + echo "RST files generated successfully!" +} + docs() { echo "Building documentation..." cd docs && make clean html - echo "Documentation built in docs/build/html" + echo "Documentation built in docs/_build/html" } docs_init() { @@ -119,8 +125,8 @@ show_help() { echo " lint Lint the code" echo " test Run the tests" echo " commit Format, lint, and test the code" + echo " docs-generate Generate documentation RST files" echo " docs Build the documentation" - echo " docs-init Initialize the Sphinx documentation" # train commands echo " doc-quality-train Train a document quality model" @@ -148,8 +154,8 @@ else "lint") lint_command ;; "test") test_command ;; "commit") commit_command ;; + "docs-generate") docs_generate ;; "docs") docs ;; - "docs-init") docs_init ;; "doc-quality-train") doc_quality_train_command ;; "doc-generator-train") doc_generator_train_command ;; "doc-generator-eval") doc_generator_eval_command ;; diff --git a/run.sh b/run.sh index 8b61d35..17589ad 100755 --- a/run.sh +++ b/run.sh @@ -75,7 +75,7 @@ show_help() { # make commands echo " mlflow-setup Install mlflow-manager dependencies and run the services" echo " mlflow-teardown Teardown mlflow-manager services" - + # train commands echo " doc-quality-train Train a document quality model" echo " doc-generator-train Train a document generator model"