diff --git a/.github/workflows/docs-publish.yml b/.github/workflows/docs-publish.yml index 663916ae..8bd3082d 100644 --- a/.github/workflows/docs-publish.yml +++ b/.github/workflows/docs-publish.yml @@ -38,6 +38,13 @@ jobs: # name: docs # path: docs/build + # For LLM consumption + - name: Pydoc plaintext + run: | + python docs/gen-pydoc.py -o build/html/dagshub-pydoc.txt -p build/html/dagshub-pydoc dagshub + python docs/gen-pydoc.py -o build/html/dagshub-annotation-converter-pydoc.txt -p build/html/dagshub-annotation-converter-pydoc dagshub-annotation-converter + + - name: Authenticate with Google uses: "google-github-actions/auth@v2" with: diff --git a/docs/gen-pydoc.py b/docs/gen-pydoc.py new file mode 100644 index 00000000..563b3245 --- /dev/null +++ b/docs/gen-pydoc.py @@ -0,0 +1,47 @@ +import pydoc +import pkgutil +import importlib +import sys +import click + + +def generate_docs(package_name, output_file=None, file_prefix=None): + package = importlib.import_module(package_name) + + out_f = open(output_file, 'w') if output_file else sys.stdout + + out_f.write(f"\n# pydoc documentation for package: {package_name}\n") + out_f.write("\n---\n") + + for _, module_name, is_pkg in pkgutil.walk_packages(package.__path__, package.__name__ + "."): + out_f.write(f"\n## pydoc of module: {module_name}\n") + out_f.write("\n---\n") + + try: + docstring = pydoc.plain(pydoc.render_doc(module_name)) + out_f.write(docstring) + except Exception as e: + out_f.write(f"Error documenting {module_name}: {e}\n") + + out_f.write("\n---\n") + out_f.write(f"\nEnd of documentation for module: {module_name}\n") + + if file_prefix: + module_filename = f"{file_prefix}_{module_name.replace('.', '_')}.txt" + with open(module_filename, 'w') as module_file: + module_file.write(docstring) + + if output_file: + out_f.close() + + +@click.command() +@click.argument("package_name") +@click.option("--output-file", "-o", default=None, help="File to write full documentation output.") +@click.option("--file-prefix", "-p", default=None, help="Prefix for separate module documentation files.") +def main(package_name, output_file, file_prefix): + generate_docs(package_name, output_file, file_prefix) + + +if __name__ == "__main__": + main()