From 15e04d7b71300b363b640619d240b1684e352969 Mon Sep 17 00:00:00 2001 From: denver Date: Fri, 14 Mar 2025 16:57:13 -0500 Subject: [PATCH 1/5] feat(data): add load_yaml_config_redacted for logging config without env values --- graphdoc/graphdoc/data/__init__.py | 2 ++ graphdoc/graphdoc/data/helper.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/graphdoc/graphdoc/data/__init__.py b/graphdoc/graphdoc/data/__init__.py index f9323a8..3f651b7 100644 --- a/graphdoc/graphdoc/data/__init__.py +++ b/graphdoc/graphdoc/data/__init__.py @@ -11,6 +11,7 @@ check_directory_path, check_file_path, load_yaml_config, + load_yaml_config_redacted, setup_logging, ) from graphdoc.data.local import LocalDataHelper @@ -45,4 +46,5 @@ "SchemaRating", "SchemaType", "schema_objects_to_dataset", + "load_yaml_config_redacted", ] diff --git a/graphdoc/graphdoc/data/helper.py b/graphdoc/graphdoc/data/helper.py index 2c3887d..5a2d6f2 100644 --- a/graphdoc/graphdoc/data/helper.py +++ b/graphdoc/graphdoc/data/helper.py @@ -92,7 +92,31 @@ def load_yaml_config(file_path: Union[str, Path], use_env: bool = True) -> dict: ) with open(_file_path, "r") as file: return yaml.load(file, Loader=SafeLoader) + +def load_yaml_config_redacted(file_path: Union[str, Path], replace_value: str = "redacted") -> dict: + """Load a YAML configuration file with environment variables redacted. + :param file_path: The path to the YAML file. + :type file_path: Union[str, Path] + :param replace_value: The value to replace the environment variables with. + :type replace_value: str + :return: The YAML configuration with env vars replaced by "redacted". + :rtype: dict + :raises ValueError: If the path does not resolve to a valid file. + """ + def _redacted_env_constructor(loader, node): + return replace_value + + SafeLoader.add_constructor("!env", _redacted_env_constructor) + + _file_path = Path(file_path).resolve() + if not _file_path.is_file(): + raise ValueError( + f"The provided path does not resolve to a valid file: {file_path}" + ) + + with open(_file_path, "r") as file: + return yaml.load(file, Loader=SafeLoader) def setup_logging( log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], From 01ea38496f3f6cce91d64732f84098060eaa29dd Mon Sep 17 00:00:00 2001 From: denver Date: Fri, 14 Mar 2025 17:37:28 -0500 Subject: [PATCH 2/5] feat(prompt): explicitely request that entities include descriptions --- graphdoc/graphdoc/prompts/schema_doc_generation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graphdoc/graphdoc/prompts/schema_doc_generation.py b/graphdoc/graphdoc/prompts/schema_doc_generation.py index 6a2f89e..30e6c25 100644 --- a/graphdoc/graphdoc/prompts/schema_doc_generation.py +++ b/graphdoc/graphdoc/prompts/schema_doc_generation.py @@ -31,6 +31,7 @@ class DocGeneratorSignature(dspy.Signature): - Descriptions should be factual, straightforward, and avoid any speculative language. - Refrain from using the phrase "in the { table } table" within your descriptions. - Ensure that the documentation adheres to standard schema formatting without modifying the underlying schema structure. + - Make sure that the entities themselves are documented. ### Formatting: - Maintain consistency with the existing documentation style and structure. From 279ee91f1b3636108d7bf4565e69446cf8ad52eb Mon Sep 17 00:00:00 2001 From: denver Date: Fri, 14 Mar 2025 17:38:08 -0500 Subject: [PATCH 3/5] fix: correct fill_empty_descriptions to return proper ast representation --- graphdoc/graphdoc/modules/doc_generator_module.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/graphdoc/graphdoc/modules/doc_generator_module.py b/graphdoc/graphdoc/modules/doc_generator_module.py index 200aa13..6a73c28 100644 --- a/graphdoc/graphdoc/modules/doc_generator_module.py +++ b/graphdoc/graphdoc/modules/doc_generator_module.py @@ -222,10 +222,13 @@ def _predict(self, database_schema: str) -> dspy.Prediction: if self.fill_empty_descriptions: updated_ast = self.par.fill_empty_descriptions(database_ast) database_schema = print_ast(updated_ast) + else: + database_schema = print_ast(database_ast) # try to generate the schema try: prediction = self.prompt.infer(database_schema=database_schema) + log.info("Generated schema: " + str(prediction.documented_schema)) except Exception as e: log.warning("Error generating schema: " + str(e)) return dspy.Prediction(documented_schema=database_schema) @@ -367,7 +370,7 @@ def document_full_schema( updated_ast = self.par.fill_empty_descriptions(document_ast) return_schema = print_ast(updated_ast) else: - return_schema = database_schema + return_schema = print_ast(document_ast) status = "ERROR" if trace: From ef07d8743557c2e088d6050eb134163a2672232b Mon Sep 17 00:00:00 2001 From: denver Date: Fri, 14 Mar 2025 17:42:34 -0500 Subject: [PATCH 4/5] test: ensure .env is loaded into test through test_cconftest --- graphdoc/tests/test_confest.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/graphdoc/tests/test_confest.py b/graphdoc/tests/test_confest.py index 6c6148a..71e8876 100644 --- a/graphdoc/tests/test_confest.py +++ b/graphdoc/tests/test_confest.py @@ -1,8 +1,12 @@ # Copyright 2025-, Semiotic AI, Inc. # SPDX-License-Identifier: Apache-2.0 +# system packages import logging +# external packages +from dotenv import load_dotenv + # internal packages from graphdoc import ( DocGeneratorPrompt, @@ -17,13 +21,12 @@ OverwriteSchemaRating, ) -# system packages - -# external packages - # logging log = logging.getLogger(__name__) +# load the environment variables +load_dotenv("../.env") + class TestFixtures: def test_parser(self, par: Parser): @@ -44,10 +47,6 @@ def test_overwrite_ldh(self, overwrite_ldh: LocalDataHelper): == OverwriteSchemaCategoryRatingMapping.get_rating ) - # def test_gd(self, gd: GraphDoc): - # assert gd is not None - # assert isinstance(gd, GraphDoc) - def test_dqp(self, dqp): assert isinstance(dqp, DocQualityPrompt) assert dqp.prompt_type == "predict" From 129c7a89ce4ad9f4086324205b84bf58a114619f Mon Sep 17 00:00:00 2001 From: denver Date: Fri, 14 Mar 2025 17:43:07 -0500 Subject: [PATCH 5/5] style: update styling to comply with black and other checks --- graphdoc/docs/conf.py | 2 +- graphdoc/graphdoc/data/helper.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/graphdoc/docs/conf.py b/graphdoc/docs/conf.py index b13eab0..2cb8c62 100644 --- a/graphdoc/docs/conf.py +++ b/graphdoc/docs/conf.py @@ -79,4 +79,4 @@ def setup(app): # Create static directory if it doesn't exist to avoid the warning if not os.path.exists(os.path.join(os.path.dirname(__file__), "_static")): - os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) \ No newline at end of file + os.makedirs(os.path.join(os.path.dirname(__file__), "_static")) diff --git a/graphdoc/graphdoc/data/helper.py b/graphdoc/graphdoc/data/helper.py index 437027e..e08f07c 100644 --- a/graphdoc/graphdoc/data/helper.py +++ b/graphdoc/graphdoc/data/helper.py @@ -95,8 +95,11 @@ def load_yaml_config(file_path: Union[str, Path], use_env: bool = True) -> dict: ) with open(_file_path, "r") as file: return yaml.load(file, Loader=SafeLoader) - -def load_yaml_config_redacted(file_path: Union[str, Path], replace_value: str = "redacted") -> dict: + + +def load_yaml_config_redacted( + file_path: Union[str, Path], replace_value: str = "redacted" +) -> dict: """Load a YAML configuration file with environment variables redacted. :param file_path: The path to the YAML file. @@ -106,7 +109,9 @@ def load_yaml_config_redacted(file_path: Union[str, Path], replace_value: str = :return: The YAML configuration with env vars replaced by "redacted". :rtype: dict :raises ValueError: If the path does not resolve to a valid file. + """ + def _redacted_env_constructor(loader, node): return replace_value @@ -117,10 +122,11 @@ def _redacted_env_constructor(loader, node): raise ValueError( f"The provided path does not resolve to a valid file: {file_path}" ) - + with open(_file_path, "r") as file: return yaml.load(file, Loader=SafeLoader) + def setup_logging( log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ):