diff --git a/fiddle/_src/validation/baseline_style.py b/fiddle/_src/validation/baseline_style.py new file mode 100644 index 00000000..748d8204 --- /dev/null +++ b/fiddle/_src/validation/baseline_style.py @@ -0,0 +1,61 @@ +# coding=utf-8 +# Copyright 2022 The Fiddle-Config Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Programmatic checks for the style of a baseline configuration. + +"Baseline configurations" are configurations for baseline or flagship models. +They will be read by a large number of people, so keeping them simple and +readable is important. +""" + +from typing import Any + +from fiddle._src import config as config_lib +from fiddle._src import daglish +from fiddle._src import history + + +def check_baseline_style( + config: Any, + *, + max_files_writing_attributes: int = 3, +) -> None: + """Checks that a configuration is style-conformant.""" + + # filename --> sample path. + sample_paths_by_file = {} + + for value, path in daglish.iterate(config): + if isinstance(value, config_lib.Buildable): + for attr_name in config_lib.ordered_arguments(value).keys(): + path_str = daglish.path_str((*path, daglish.Attr(attr_name))) + attr_history = value.__argument_history__[attr_name] + for history_entry in attr_history: + location: history.Location = history_entry.location + sample_paths_by_file.setdefault(location.filename, path_str) + pass + + if len(sample_paths_by_file) > max_files_writing_attributes: + debug_str = ", ".join( + f"{filename} (example: wrote to {path})" + for filename, path in sample_paths_by_file.items() + ) + raise ValueError( + f"More than {max_files_writing_attributes} file(s) produced this" + " config. For baseline configurations, please ensure that you don't" + " have a big hierarchy of files each overriding values in a config," + " because this is harder to read for new users. The files which have" + f" written to this config are: {debug_str}." + ) diff --git a/fiddle/_src/validation/baseline_style_test.py b/fiddle/_src/validation/baseline_style_test.py new file mode 100644 index 00000000..4a950855 --- /dev/null +++ b/fiddle/_src/validation/baseline_style_test.py @@ -0,0 +1,47 @@ +# coding=utf-8 +# Copyright 2022 The Fiddle-Config Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for baseline_style.""" + + +from absl.testing import absltest +from fiddle._src.testing.example import fake_encoder_decoder +from fiddle._src.validation import fake_experiment +from fiddle.validation import baseline_style + + +class BaselineStyleTest(absltest.TestCase): + + def test_check_baseline_style_okay(self): + config = fake_encoder_decoder.fixture.as_buildable() + baseline_style.check_baseline_style( + config=config, max_files_writing_attributes=1 + ) + + def test_check_baseline_style_too_many_files(self): + config = fake_experiment.fake_experiment() + with self.assertRaisesRegex( + ValueError, + r"More than 1 file\(s\) produced this config.*files which have" + r" written.*fake_encoder_decoder.*\.encoder.*" + r"fake_experiment.*encoder\.attention\.dtype", + ): + baseline_style.check_baseline_style( + config=config, max_files_writing_attributes=1 + ) + + +if __name__ == "__main__": + absltest.main() diff --git a/fiddle/_src/validation/fake_experiment.py b/fiddle/_src/validation/fake_experiment.py new file mode 100644 index 00000000..ae315af6 --- /dev/null +++ b/fiddle/_src/validation/fake_experiment.py @@ -0,0 +1,24 @@ +# coding=utf-8 +# Copyright 2022 The Fiddle-Config Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fake experiment overrides for the encoder-decoder.""" + +from fiddle._src.testing.example import fake_encoder_decoder + + +def fake_experiment(): + config = fake_encoder_decoder.fixture.as_buildable() + config.encoder.attention.dtype = "float64" + return config diff --git a/fiddle/validation/baseline_style.py b/fiddle/validation/baseline_style.py new file mode 100644 index 00000000..8f44b440 --- /dev/null +++ b/fiddle/validation/baseline_style.py @@ -0,0 +1,24 @@ +# coding=utf-8 +# Copyright 2022 The Fiddle-Config Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Programmatic checks for the style of a baseline configuration. + +"Baseline configurations" are configurations for baseline or flagship models. +They will be read by a large number of people, so keeping them simple and +readable is important. +""" + +# pylint: disable=unused-import +from fiddle._src.validation.baseline_style import check_baseline_style