diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index b13eab0..2cb8c62 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -79,4 +79,4 @@ def setup(app): # Create static directory if it doesn't exist to avoid the warning if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")): - os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) \ No newline at end of file + os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) diff --git a/graphdoc/graphdoc/data/__init__.py b/graphdoc/graphdoc/data/__init__.py index f9323a8..3f651b7 100644 --- a/graphdoc/graphdoc/data/__init__.py +++ b/graphdoc/graphdoc/data/__init__.py @@ -11,6 +11,7 @@ check_directory_path, check_file_path, load_yaml_config, + load_yaml_config_redacted, setup_logging, ) from graphdoc.data.local import LocalDataHelper @@ -45,4 +46,5 @@ "SchemaRating", "SchemaType", "schema_objects_to_dataset", + "load_yaml_config_redacted", ] diff --git a/graphdoc/graphdoc/data/helper.py b/graphdoc/graphdoc/data/helper.py index 00f3a7b..e08f07c 100644 --- a/graphdoc/graphdoc/data/helper.py +++ b/graphdoc/graphdoc/data/helper.py @@ -97,6 +97,36 @@ def load_yaml_config(file_path: Union[str, Path], use_env: bool = True) -> dict: return yaml.load(file, Loader=SafeLoader) +def load_yaml_config_redacted( + file_path: Union[str, Path], replace_value: str = "redacted" +) -> dict: + """Load a YAML configuration file with environment variables redacted. + + :param file_path: The path to the YAML file. + :type file_path: Union[str, Path] + :param replace_value: The value to replace the environment variables with. + :type replace_value: str + :return: The YAML configuration with env vars replaced by "redacted". + :rtype: dict + :raises ValueError: If the path does not resolve to a valid file. + + """ + + def _redacted_env_constructor(loader, node): + return replace_value + + SafeLoader.add_constructor("!env", _redacted_env_constructor) + + _file_path = Path(file_path).resolve() + if not _file_path.is_file(): + raise ValueError( + f"The provided path does not resolve to a valid file: {file_path}" + ) + + with open(_file_path, "r") as file: + return yaml.load(file, Loader=SafeLoader) + + def setup_logging( log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ): diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index 200aa13..6a73c28 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -222,10 +222,13 @@ def _predict(self, database_schema: str) -> dspy.Prediction: if self.fill_empty_descriptions: updated_ast = self.par.fill_empty_descriptions(database_ast) database_schema = print_ast(updated_ast) + else: + database_schema = print_ast(database_ast) # try to generate the schema try: prediction = self.prompt.infer(database_schema=database_schema) + log.info("Generated schema: " + str(prediction.documented_schema)) except Exception as e: log.warning("Error generating schema: " + str(e)) return dspy.Prediction(documented_schema=database_schema) @@ -367,7 +370,7 @@ def document_full_schema( updated_ast = self.par.fill_empty_descriptions(document_ast) return_schema = print_ast(updated_ast) else: - return_schema = database_schema + return_schema = print_ast(document_ast) status = "ERROR" if trace: diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 6a2f89e..30e6c25 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -31,6 +31,7 @@ class DocGeneratorSignature(dspy.Signature): - Descriptions should be factual, straightforward, and avoid any speculative language. - Refrain from using the phrase "in the { table } table" within your descriptions. - Ensure that the documentation adheres to standard schema formatting without modifying the underlying schema structure. + - Make sure that the entities themselves are documented. ### Formatting: - Maintain consistency with the existing documentation style and structure. diff --git a/graphdoc/tests/test_confest.py b/graphdoc/tests/test_confest.py index 6c6148a..71e8876 100644 --- a/graphdoc/tests/test_confest.py +++ b/graphdoc/tests/test_confest.py @@ -1,8 +1,12 @@ # Copyright 2025-, Semiotic AI, Inc. # SPDX-License-Identifier: Apache-2.0 +# system packages import logging +# external packages +from dotenv import load_dotenv + # internal packages from graphdoc import ( DocGeneratorPrompt, @@ -17,13 +21,12 @@ OverwriteSchemaRating, ) -# system packages - -# external packages - # logging log = logging.getLogger(__name__) +# load the environment variables +load_dotenv("../.env") + class TestFixtures: def test_parser(self, par: Parser): @@ -44,10 +47,6 @@ def test_overwrite_ldh(self, overwrite_ldh: LocalDataHelper): == OverwriteSchemaCategoryRatingMapping.get_rating ) - # def test_gd(self, gd: GraphDoc): - # assert gd is not None - # assert isinstance(gd, GraphDoc) - def test_dqp(self, dqp): assert isinstance(dqp, DocQualityPrompt) assert dqp.prompt_type == "predict"