From bf6e0001e00bff1a0933dd53588bbc45b312cc76 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 02:07:17 +0530 Subject: [PATCH 1/6] changes made --- openml/_api_calls.py | 4 +- openml/config.py | 252 +++++++++++++++++++++---------------------- 2 files changed, 127 insertions(+), 129 deletions(-) diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 81296b3da..12567ac7a 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -71,7 +71,7 @@ def resolve_env_proxies(url: str) -> str | None: def _create_url_from_endpoint(endpoint: str) -> str: - url = config.server + url = config._config.server if not url.endswith("/"): url += "/" url += endpoint @@ -301,7 +301,7 @@ def _file_id_to_url(file_id: int, filename: str | None = None) -> str: Presents the URL how to download a given file id filename is optional """ - openml_url = config.server.split("/api/") + openml_url = config._config.server.split("/api/") url = openml_url[0] + f"/data/download/{file_id!s}" if filename is not None: url += "/" + filename diff --git a/openml/config.py b/openml/config.py index cf66a6346..98a48a1c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -1,6 +1,7 @@ """Store module level information like the API key, cache directory and the server""" # License: BSD 3-Clause +# ruff: noqa: PLW0603 from __future__ import annotations import configparser @@ -11,10 +12,11 @@ import shutil import warnings from contextlib import contextmanager +from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator, cast -from typing_extensions import Literal, TypedDict +from typing import Any, Iterator +from typing_extensions import Literal from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -27,19 +29,62 @@ _TEST_SERVER_NORMAL_USER_KEY = "normaluser" -class _Config(TypedDict): - apikey: str - server: str - cachedir: Path - avoid_duplicate_runs: bool - retry_policy: Literal["human", "robot"] - connection_n_retries: int - show_progress: bool +# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) +_user_path = Path("~").expanduser().absolute() + + +def _resolve_default_cache_dir() -> Path: + user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) + if user_defined_cache_dir is not None: + return Path(user_defined_cache_dir) + + if platform.system().lower() != "linux": + return _user_path / ".openml" + + xdg_cache_home = os.environ.get("XDG_CACHE_HOME") + if xdg_cache_home is None: + return Path("~", ".cache", "openml") + + # This is the proper XDG_CACHE_HOME directory, but + # we unfortunately had a problem where we used XDG_CACHE_HOME/org, + # we check heuristically if this old directory still exists and issue + # a warning if it does. There's too much data to move to do this for the user. + + # The new cache directory exists + cache_dir = Path(xdg_cache_home) / "openml" + if cache_dir.exists(): + return cache_dir + + # The old cache directory *does not* exist + heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" + if not heuristic_dir_for_backwards_compat.exists(): + return cache_dir + + root_dir_to_delete = Path(xdg_cache_home) / "org" + openml_logger.warning( + "An old cache directory was found at '%s'. This directory is no longer used by " + "OpenML-Python. To silence this warning you would need to delete the old cache " + "directory. The cached files will then be located in '%s'.", + root_dir_to_delete, + cache_dir, + ) + return Path(xdg_cache_home) + + +@dataclass(frozen=True) +class OpenMLConfig: + apikey: str = "" + server: str = "https://www.openml.org/api/v1/xml" + cachedir: Path = _resolve_default_cache_dir() # noqa: RUF009 + avoid_duplicate_runs: bool = False + retry_policy: Literal["human", "robot"] = "human" + connection_n_retries: int = 5 + show_progress: bool = False def _create_log_handlers(create_file_handler: bool = True) -> None: # noqa: FBT001, FBT002 """Creates but does not attach the log handlers.""" - global console_handler, file_handler # noqa: PLW0603 + global console_handler, file_handler, _root_cache_directory # noqa: PLW0602 if console_handler is not None or file_handler is not None: logger.debug("Requested to create log handlers, but they are already created.") return @@ -105,61 +150,22 @@ def set_file_log_level(file_output_level: int) -> None: _set_level_register_and_store(file_handler, file_output_level) -# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards) -_user_path = Path("~").expanduser().absolute() - - -def _resolve_default_cache_dir() -> Path: - user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) - if user_defined_cache_dir is not None: - return Path(user_defined_cache_dir) - - if platform.system().lower() != "linux": - return _user_path / ".openml" - - xdg_cache_home = os.environ.get("XDG_CACHE_HOME") - if xdg_cache_home is None: - return Path("~", ".cache", "openml") +_config: OpenMLConfig = OpenMLConfig() +_root_cache_directory: Path = _config.cachedir - # This is the proper XDG_CACHE_HOME directory, but - # we unfortunately had a problem where we used XDG_CACHE_HOME/org, - # we check heuristically if this old directory still exists and issue - # a warning if it does. There's too much data to move to do this for the user. - # The new cache directory exists - cache_dir = Path(xdg_cache_home) / "openml" - if cache_dir.exists(): - return cache_dir +def __getattr__(name: str) -> Any: + if hasattr(_config, name): + return getattr(_config, name) + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - # The old cache directory *does not* exist - heuristic_dir_for_backwards_compat = Path(xdg_cache_home) / "org" / "openml" - if not heuristic_dir_for_backwards_compat.exists(): - return cache_dir - root_dir_to_delete = Path(xdg_cache_home) / "org" - openml_logger.warning( - "An old cache directory was found at '%s'. This directory is no longer used by " - "OpenML-Python. To silence this warning you would need to delete the old cache " - "directory. The cached files will then be located in '%s'.", - root_dir_to_delete, - cache_dir, - ) - return Path(xdg_cache_home) - - -_defaults: _Config = { - "apikey": "", - "server": "https://www.openml.org/api/v1/xml", - "cachedir": _resolve_default_cache_dir(), - "avoid_duplicate_runs": False, - "retry_policy": "human", - "connection_n_retries": 5, - "show_progress": False, -} - -# Default values are actually added here in the _setup() function which is -# called at the end of this module -server = _defaults["server"] +def __setattr__(name: str, value: Any) -> None: # noqa: N807 + global _config + if hasattr(_config, name): + _config = replace(_config, **{name: value}) + else: + raise AttributeError(f"module 'openml.config' has no attribute '{name}'") def get_server_base_url() -> str: @@ -172,23 +178,12 @@ def get_server_base_url() -> str: ------- str """ - domain, path = server.split("/api", maxsplit=1) + domain, _ = _config.server.split("/api", maxsplit=1) return domain.replace("api", "www") -apikey: str = _defaults["apikey"] -show_progress: bool = _defaults["show_progress"] -# The current cache directory (without the server name) -_root_cache_directory: Path = Path(_defaults["cachedir"]) -avoid_duplicate_runs = _defaults["avoid_duplicate_runs"] - -retry_policy: Literal["human", "robot"] = _defaults["retry_policy"] -connection_n_retries: int = _defaults["connection_n_retries"] - - def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = None) -> None: - global retry_policy # noqa: PLW0603 - global connection_n_retries # noqa: PLW0603 + global _config default_retries_by_policy = {"human": 5, "robot": 50} if value not in default_retries_by_policy: @@ -202,8 +197,11 @@ def set_retry_policy(value: Literal["human", "robot"], n_retries: int | None = N if isinstance(n_retries, int) and n_retries < 1: raise ValueError(f"`n_retries` is '{n_retries}' but must be positive.") - retry_policy = value - connection_n_retries = default_retries_by_policy[value] if n_retries is None else n_retries + _config = replace( + _config, + retry_policy=value, + connection_n_retries=(default_retries_by_policy[value] if n_retries is None else n_retries), + ) class ConfigurationForExamples: @@ -222,24 +220,30 @@ def start_using_configuration_for_example(cls) -> None: To configuration as was before this call is stored, and can be recovered by using the `stop_use_example_configuration` method. """ - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config - if cls._start_last_called and server == cls._test_server and apikey == cls._test_apikey: + if ( + cls._start_last_called + and _config.server == cls._test_server + and _config.apikey == cls._test_apikey + ): # Method is called more than once in a row without modifying the server or apikey. # We don't want to save the current test configuration as a last used configuration. return - cls._last_used_server = server - cls._last_used_key = apikey + cls._last_used_server = _config.server + cls._last_used_key = _config.apikey cls._start_last_called = True # Test server key for examples - server = cls._test_server - apikey = cls._test_apikey + _config = replace( + _config, + server=cls._test_server, + apikey=cls._test_apikey, + ) warnings.warn( - f"Switching to the test server {server} to not upload results to the live server. " - "Using the test server may result in reduced performance of the API!", + f"Switching to the test server {_config.server} to not upload results to " + "the live server. Using the test server may result in reduced performance of the API!", stacklevel=2, ) @@ -254,11 +258,9 @@ def stop_using_configuration_for_example(cls) -> None: "`start_use_example_configuration` must be called first.", ) - global server # noqa: PLW0603 - global apikey # noqa: PLW0603 + global _config + _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - server = cast(str, cls._last_used_server) - apikey = cast(str, cls._last_used_key) cls._start_last_called = False @@ -327,7 +329,7 @@ def determine_config_file_path() -> Path: return config_dir / "config" -def _setup(config: _Config | None = None) -> None: +def _setup(config: dict[str, Any] | None = None) -> None: """Setup openml package. Called on first import. Reads the config file and sets up apikey, server, cache appropriately. @@ -336,11 +338,8 @@ def _setup(config: _Config | None = None) -> None: openml.config.server = SOMESERVER We could also make it a property but that's less clear. """ - global apikey # noqa: PLW0603 - global server # noqa: PLW0603 - global _root_cache_directory # noqa: PLW0603 - global avoid_duplicate_runs # noqa: PLW0603 - global show_progress # noqa: PLW0603 + global _config + global _root_cache_directory config_file = determine_config_file_path() config_dir = config_file.parent @@ -358,19 +357,24 @@ def _setup(config: _Config | None = None) -> None: if config is None: config = _parse_config(config_file) - avoid_duplicate_runs = config["avoid_duplicate_runs"] - apikey = config["apikey"] - server = config["server"] - show_progress = config["show_progress"] - n_retries = int(config["connection_n_retries"]) + _config = replace( + _config, + apikey=config["apikey"], + server=config["server"], + show_progress=config["show_progress"], + avoid_duplicate_runs=config["avoid_duplicate_runs"], + retry_policy=config["retry_policy"], + connection_n_retries=int(config["connection_n_retries"]), + ) - set_retry_policy(config["retry_policy"], n_retries) + set_retry_policy(config["retry_policy"], _config.connection_n_retries) user_defined_cache_dir = os.environ.get(OPENML_CACHE_DIR_ENV_VAR) if user_defined_cache_dir is not None: short_cache_dir = Path(user_defined_cache_dir) else: short_cache_dir = Path(config["cachedir"]) + _root_cache_directory = short_cache_dir.expanduser().resolve() try: @@ -389,29 +393,31 @@ def _setup(config: _Config | None = None) -> None: def set_field_in_config_file(field: str, value: Any) -> None: """Overwrites the `field` in the configuration file with the new `value`.""" - if field not in _defaults: - raise ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.") + global _config + if not hasattr(_config, field): + raise ValueError( + f"Field '{field}' is not valid and must be one of '{_config.__dict__.keys()}'." + ) - # TODO(eddiebergman): This use of globals has gone too far - globals()[field] = value + _config = replace(_config, **{field: value}) config_file = determine_config_file_path() - config = _parse_config(config_file) + existing = _parse_config(config_file) with config_file.open("w") as fh: - for f in _defaults: + for f in _config.__dict__: # We can't blindly set all values based on globals() because when the user # sets it through config.FIELD it should not be stored to file. # There doesn't seem to be a way to avoid writing defaults to file with configparser, # because it is impossible to distinguish from an explicitly set value that matches # the default value, to one that was set to its default because it was omitted. - value = globals()[f] if f == field else config.get(f) # type: ignore - if value is not None: - fh.write(f"{f} = {value}\n") + v = value if f == field else existing.get(f) + if v is not None: + fh.write(f"{f} = {v}\n") -def _parse_config(config_file: str | Path) -> _Config: +def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_defaults) # type: ignore + config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -434,16 +440,8 @@ def _parse_config(config_file: str | Path) -> _Config: return configuration # type: ignore -def get_config_as_dict() -> _Config: - return { - "apikey": apikey, - "server": server, - "cachedir": _root_cache_directory, - "avoid_duplicate_runs": avoid_duplicate_runs, - "connection_n_retries": connection_n_retries, - "retry_policy": retry_policy, - "show_progress": show_progress, - } +def get_config_as_dict() -> dict[str, Any]: + return _config.__dict__.copy() # NOTE: For backwards compatibility, we keep the `str` @@ -467,7 +465,7 @@ def get_cache_directory() -> str: The current cache directory. """ - url_suffix = urlparse(server).netloc + url_suffix = urlparse(_config.server).netloc reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1]) # noqa: PTH118 return os.path.join(_root_cache_directory, reversed_url_suffix) # noqa: PTH118 @@ -491,7 +489,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory # noqa: PLW0603 + global _root_cache_directory _root_cache_directory = Path(root_cache_directory) @@ -502,7 +500,7 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: @contextmanager -def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: +def overwrite_config_context(config: dict[str, Any]) -> Iterator[dict[str, Any]]: """A context manager to temporarily override variables in the configuration.""" existing_config = get_config_as_dict() merged_config = {**existing_config, **config} @@ -515,10 +513,10 @@ def overwrite_config_context(config: dict[str, Any]) -> Iterator[_Config]: __all__ = [ "get_cache_directory", + "get_config_as_dict", "set_root_cache_directory", "start_using_configuration_for_example", "stop_using_configuration_for_example", - "get_config_as_dict", ] _setup() From 834782c105b5244095e20f17059c081b88634640 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Tue, 30 Dec 2025 12:31:52 +0530 Subject: [PATCH 2/6] bug fixing --- examples/Advanced/datasets_tutorial.py | 2 +- .../benchmark_with_optunahub.py | 4 +-- .../flow_id_tutorial.py | 2 +- openml/_api_calls.py | 12 ++++----- openml/cli.py | 2 +- openml/config.py | 16 +++++++----- openml/runs/functions.py | 2 +- openml/testing.py | 14 +++++----- tests/conftest.py | 16 ++++++------ tests/test_datasets/test_dataset_functions.py | 14 +++++----- tests/test_openml/test_config.py | 26 +++++++++---------- tests/test_utils/test_utils.py | 2 +- 12 files changed, 58 insertions(+), 54 deletions(-) diff --git a/examples/Advanced/datasets_tutorial.py b/examples/Advanced/datasets_tutorial.py index cc57686d0..3a4833206 100644 --- a/examples/Advanced/datasets_tutorial.py +++ b/examples/Advanced/datasets_tutorial.py @@ -139,7 +139,7 @@ # only for the dataset owner. Further, critical fields cannot be edited if the dataset has any # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you, # configure the API key: -# openml.config.apikey = 'FILL_IN_OPENML_API_KEY' +# openml.config._config.apikey = 'FILL_IN_OPENML_API_KEY' # This example here only shows a failure when trying to work on a dataset not owned by you: # %% diff --git a/examples/_external_or_deprecated/benchmark_with_optunahub.py b/examples/_external_or_deprecated/benchmark_with_optunahub.py index ece3e7c40..c8f5f7b0c 100644 --- a/examples/_external_or_deprecated/benchmark_with_optunahub.py +++ b/examples/_external_or_deprecated/benchmark_with_optunahub.py @@ -44,7 +44,7 @@ # account (you don't need one for anything else, just to upload your results), # go to your profile and select the API-KEY. # Or log in, and navigate to https://www.openml.org/auth/api-key -openml.config.apikey = "" +openml.config._config.apikey = "" ############################################################################ # Prepare for preprocessors and an OpenML task # ============================================ @@ -95,7 +95,7 @@ def objective(trial: optuna.Trial) -> Pipeline: run = openml.runs.run_model_on_task(pipe, task=task_id, avoid_duplicate_runs=False) logger.log(1, f"Model has been trained - {run}") - if openml.config.apikey != "": + if openml.config._config.apikey != "": try: run.publish() diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index e813655fc..c533cfd9f 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config.server = "https://api.openml.org/api/v1/xml" +openml.config._configserver = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 12567ac7a..c3f6d285f 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -172,7 +172,7 @@ def _download_minio_file( bucket_name=bucket, object_name=object_name, file_path=str(destination), - progress=ProgressBar() if config.show_progress else None, + progress=ProgressBar() if config._config.show_progress else None, request_headers=_HEADERS, ) if destination.is_file() and destination.suffix == ".zip": @@ -317,7 +317,7 @@ def _read_url_files( and sending file_elements as files """ data = {} if data is None else data - data["api_key"] = config.apikey + data["api_key"] = config._config.apikey if file_elements is None: file_elements = {} # Using requests.post sets header 'Accept-encoding' automatically to @@ -337,8 +337,8 @@ def __read_url( md5_checksum: str | None = None, ) -> requests.Response: data = {} if data is None else data - if config.apikey: - data["api_key"] = config.apikey + if config._config.apikey: + data["api_key"] = config._config.apikey return _send_request( request_method=request_method, url=url, @@ -363,10 +363,10 @@ def _send_request( # noqa: C901, PLR0912 files: FILE_ELEMENTS_TYPE | None = None, md5_checksum: str | None = None, ) -> requests.Response: - n_retries = max(1, config.connection_n_retries) + n_retries = max(1, config._config.connection_n_retries) response: requests.Response | None = None - delay_method = _human_delay if config.retry_policy == "human" else _robot_delay + delay_method = _human_delay if config._config.retry_policy == "human" else _robot_delay # Error to raise in case of retrying too often. Will be set to the last observed exception. retry_raise_e: Exception | None = None diff --git a/openml/cli.py b/openml/cli.py index d0a46e498..fb39afe97 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -339,7 +339,7 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config._defaults if f not in ["max_retries"]] + configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] parser_configure.add_argument( "field", diff --git a/openml/config.py b/openml/config.py index 98a48a1c6..20825463e 100644 --- a/openml/config.py +++ b/openml/config.py @@ -15,7 +15,7 @@ from dataclasses import dataclass, replace from io import StringIO from pathlib import Path -from typing import Any, Iterator +from typing import Any, Iterator, cast from typing_extensions import Literal from urllib.parse import urlparse @@ -71,7 +71,7 @@ def _resolve_default_cache_dir() -> Path: return Path(xdg_cache_home) -@dataclass(frozen=True) +@dataclass class OpenMLConfig: apikey: str = "" server: str = "https://www.openml.org/api/v1/xml" @@ -259,8 +259,11 @@ def stop_using_configuration_for_example(cls) -> None: ) global _config - _config = replace(_config, server=cls._test_server, apikey=cls._test_apikey) - + _config = replace( + _config, + server=cast(str, cls._last_used_server), + apikey=cast(str, cls._last_used_key), + ) cls._start_last_called = False @@ -334,8 +337,8 @@ def _setup(config: dict[str, Any] | None = None) -> None: Reads the config file and sets up apikey, server, cache appropriately. key and server can be set by the user simply using - openml.config.apikey = THEIRKEY - openml.config.server = SOMESERVER + openml.config._config.apikey = THEIRKEY + openml.config._config.server = SOMESERVER We could also make it a property but that's less clear. """ global _config @@ -376,6 +379,7 @@ def _setup(config: dict[str, Any] | None = None) -> None: short_cache_dir = Path(config["cachedir"]) _root_cache_directory = short_cache_dir.expanduser().resolve() + _config = replace(_config, cachedir=_root_cache_directory) try: cache_exists = _root_cache_directory.exists() diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 666b75c37..7fa560833 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -226,7 +226,7 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913 raise ValueError("flow_tags should be a list") if avoid_duplicate_runs is None: - avoid_duplicate_runs = openml.config.avoid_duplicate_runs + avoid_duplicate_runs = openml.config._config.avoid_duplicate_runs # TODO: At some point in the future do not allow for arguments in old order (changed 6-2018). # Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019). diff --git a/openml/testing.py b/openml/testing.py index d1da16876..fbf7edf44 100644 --- a/openml/testing.py +++ b/openml/testing.py @@ -99,13 +99,13 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None: os.chdir(self.workdir) self.cached = True - openml.config.apikey = TestBase.user_key + openml.config._config.apikey = TestBase.user_key self.production_server = "https://www.openml.org/api/v1/xml" openml.config.set_root_cache_directory(str(self.workdir)) # Increase the number of retries to avoid spurious server failures - self.retry_policy = openml.config.retry_policy - self.connection_n_retries = openml.config.connection_n_retries + self.retry_policy = openml.config._config.retry_policy + self.connection_n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) def use_production_server(self) -> None: @@ -114,8 +114,8 @@ def use_production_server(self) -> None: Please use this sparingly - it is better to use the test server. """ - openml.config.server = self.production_server - openml.config.apikey = "" + openml.config._config.server = self.production_server + openml.config._config.apikey = "" def tearDown(self) -> None: """Tear down the test""" @@ -127,8 +127,8 @@ def tearDown(self) -> None: # one of the files may still be used by another process raise e - openml.config.connection_n_retries = self.connection_n_retries - openml.config.retry_policy = self.retry_policy + openml.config._config.connection_n_retries = self.connection_n_retries + openml.config._config.retry_policy = self.retry_policy @classmethod def _mark_entity_for_removal( diff --git a/tests/conftest.py b/tests/conftest.py index bd974f3f3..ba7c65813 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,8 +97,8 @@ def delete_remote_files(tracker, flow_names) -> None: :param tracker: Dict :return: None """ - openml.config.server = TestBase.test_server - openml.config.apikey = TestBase.user_key + openml.config._config.server = TestBase.test_server + openml.config._config.apikey = TestBase.user_key # reordering to delete sub flows at the end of flows # sub-flows have shorter names, hence, sorting by descending order of flow name length @@ -263,8 +263,8 @@ def verify_cache_state(test_files_directory) -> Iterator[None]: @pytest.fixture(autouse=True, scope="session") def as_robot() -> Iterator[None]: - policy = openml.config.retry_policy - n_retries = openml.config.connection_n_retries + policy = openml.config._config.retry_policy + n_retries = openml.config._config.connection_n_retries openml.config.set_retry_policy("robot", n_retries=20) yield openml.config.set_retry_policy(policy, n_retries) @@ -273,12 +273,12 @@ def as_robot() -> Iterator[None]: @pytest.fixture(autouse=True) def with_server(request): if "production" in request.keywords: - openml.config.server = "https://www.openml.org/api/v1/xml" - openml.config.apikey = None + openml.config._config.server = "https://www.openml.org/api/v1/xml" + openml.config._config.apikey = None yield return - openml.config.server = "https://test.openml.org/api/v1/xml" - openml.config.apikey = TestBase.user_key + openml.config._config.server = "https://test.openml.org/api/v1/xml" + openml.config._config.apikey = TestBase.user_key yield diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 266a6f6f7..ab5a4d8b8 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -153,7 +153,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) - openml.config.server = self.test_server + openml.config._config.server = self.test_server def test_illegal_character_tag(self): dataset = openml.datasets.get_dataset(1) @@ -179,7 +179,7 @@ def test__name_to_id_with_deactivated(self): self.use_production_server() # /d/1 was deactivated assert openml.datasets.functions._name_to_id("anneal") == 2 - openml.config.server = self.test_server + openml.config._config.server = self.test_server @pytest.mark.production() def test__name_to_id_with_multiple_active(self): @@ -417,8 +417,8 @@ def test__getarff_md5_issue(self): "oml:md5_checksum": "abc", "oml:url": "https://www.openml.org/data/download/61", } - n = openml.config.connection_n_retries - openml.config.connection_n_retries = 1 + n = openml.config._config.connection_n_retries + openml.config._config.connection_n_retries = 1 self.assertRaisesRegex( OpenMLHashException, @@ -428,7 +428,7 @@ def test__getarff_md5_issue(self): description, ) - openml.config.connection_n_retries = n + openml.config._config.connection_n_retries = n def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) @@ -588,7 +588,7 @@ def test_data_status(self): # admin key for test server (only admins can activate datasets. # all users can deactivate their own datasets) - openml.config.apikey = TestBase.admin_key + openml.config._config.apikey = TestBase.admin_key openml.datasets.status_update(did, "active") self._assert_status_of_dataset(did=did, status="active") @@ -1507,7 +1507,7 @@ def test_list_datasets_with_high_size_parameter(self): datasets_b = openml.datasets.list_datasets(size=np.inf) # Reverting to test server - openml.config.server = self.test_server + openml.config._config.server = self.test_server assert len(datasets_a) == len(datasets_b) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 7ef223504..3ff4bcb00 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config._defaults) + _dd = copy(openml.config.get_config_as_dict()) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) @@ -110,26 +110,26 @@ class TestConfigurationForExamples(openml.testing.TestBase): def test_switch_to_example_configuration(self): """Verifies the test configuration is loaded properly.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.admin_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.admin_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.test_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.test_server @pytest.mark.production() def test_switch_from_example_configuration(self): """Verifies the previous configuration is loaded after stopping.""" # Below is the default test key which would be used anyway, but just for clarity: - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_example_configuration_stop_before_start(self): """Verifies an error is raised if `stop_...` is called before `start_...`.""" @@ -146,15 +146,15 @@ def test_example_configuration_stop_before_start(self): @pytest.mark.production() def test_example_configuration_start_twice(self): """Checks that the original config can be returned to if `start..` is called twice.""" - openml.config.apikey = TestBase.user_key - openml.config.server = self.production_server + openml.config._config.apikey = TestBase.user_key + openml.config._config.server = self.production_server openml.config.start_using_configuration_for_example() openml.config.start_using_configuration_for_example() openml.config.stop_using_configuration_for_example() - assert openml.config.apikey == TestBase.user_key - assert openml.config.server == self.production_server + assert openml.config._config.apikey == TestBase.user_key + assert openml.config._config.server == self.production_server def test_configuration_file_not_overwritten_on_load(): diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 35be84903..1c0b50fe5 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -44,7 +44,7 @@ def min_number_evaluations_on_test_server() -> int: def _mocked_perform_api_call(call, request_method): - url = openml.config.server + "/" + call + url = openml.config._config.server + "/" + call return openml._api_calls._download_text_file(url) From 38ae9beb47122c54df2122e113ac8a4727bb2eb7 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:07:30 +0530 Subject: [PATCH 3/6] test failures fix --- examples/Basics/introduction_tutorial.py | 2 +- openml/config.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index c864772f5..648bc90ed 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -35,7 +35,7 @@ # %% import openml -openml.config.apikey = "YOURKEY" +openml.config._config.apikey = "YOURKEY" # %% [markdown] # ## Caching diff --git a/openml/config.py b/openml/config.py index 20825463e..f2020b8c6 100644 --- a/openml/config.py +++ b/openml/config.py @@ -261,8 +261,8 @@ def stop_using_configuration_for_example(cls) -> None: global _config _config = replace( _config, - server=cast(str, cls._last_used_server), - apikey=cast(str, cls._last_used_key), + server=cast("str", cls._last_used_server), + apikey=cast("str", cls._last_used_key), ) cls._start_last_called = False @@ -421,7 +421,7 @@ def set_field_in_config_file(field: str, value: Any) -> None: def _parse_config(config_file: str | Path) -> dict[str, Any]: """Parse the config file, set up defaults.""" config_file = Path(config_file) - config = configparser.RawConfigParser(defaults=_config.__dict__) # type: ignore + config = configparser.RawConfigParser(defaults=OpenMLConfig().__dict__) # type: ignore # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file. # Cheat the ConfigParser module by adding a fake section header @@ -493,8 +493,9 @@ def set_root_cache_directory(root_cache_directory: str | Path) -> None: -------- get_cache_directory """ - global _root_cache_directory + global _root_cache_directory, _config _root_cache_directory = Path(root_cache_directory) + _config = replace(_config, cachedir=_root_cache_directory) start_using_configuration_for_example = ( From 93ab9c21ce0dcd307666f98766b924e5bc1c09ba Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:13:37 +0530 Subject: [PATCH 4/6] Update flow_id_tutorial.py --- examples/_external_or_deprecated/flow_id_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/_external_or_deprecated/flow_id_tutorial.py b/examples/_external_or_deprecated/flow_id_tutorial.py index c533cfd9f..496102085 100644 --- a/examples/_external_or_deprecated/flow_id_tutorial.py +++ b/examples/_external_or_deprecated/flow_id_tutorial.py @@ -16,7 +16,7 @@ # %% openml.config.start_using_configuration_for_example() -openml.config._configserver = "https://api.openml.org/api/v1/xml" +openml.config._config.server = "https://api.openml.org/api/v1/xml" # %% # Defining a classifier From aa25dd69aa2a8b08f17a3bd2d411a1829fd6eccf Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 00:24:36 +0530 Subject: [PATCH 5/6] _defaults bug fixing --- openml/cli.py | 6 +++++- tests/test_openml/test_config.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openml/cli.py b/openml/cli.py index fb39afe97..c1363ea74 100644 --- a/openml/cli.py +++ b/openml/cli.py @@ -9,6 +9,8 @@ from typing import Callable from urllib.parse import urlparse +from attr import fields + from openml import config @@ -339,7 +341,9 @@ def main() -> None: "'https://openml.github.io/openml-python/main/usage.html#configuration'.", ) - configurable_fields = [f for f in config.get_config_as_dict() if f not in ["max_retries"]] + configurable_fields = [ + f.name for f in fields(config.OpenMLConfig) if f.name not in ["max_retries"] + ] parser_configure.add_argument( "field", diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 3ff4bcb00..104639460 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -46,7 +46,7 @@ class TestConfig(openml.testing.TestBase): def test_non_writable_home(self, log_handler_mock, warnings_mock): with tempfile.TemporaryDirectory(dir=self.workdir) as td: os.chmod(td, 0o444) - _dd = copy(openml.config.get_config_as_dict()) + _dd = copy(openml.config.OpenMLConfig().__dict__) _dd["cachedir"] = Path(td) / "something-else" openml.config._setup(_dd) From a98b6b1c7753dbf02d8d6a2dc552abff8e8c60bb Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:10:58 +0530 Subject: [PATCH 6/6] removed __setattr__ given it is not supported --- openml/config.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/openml/config.py b/openml/config.py index f2020b8c6..ad8060e7d 100644 --- a/openml/config.py +++ b/openml/config.py @@ -160,14 +160,6 @@ def __getattr__(name: str) -> Any: raise AttributeError(f"module 'openml.config' has no attribute '{name}'") -def __setattr__(name: str, value: Any) -> None: # noqa: N807 - global _config - if hasattr(_config, name): - _config = replace(_config, **{name: value}) - else: - raise AttributeError(f"module 'openml.config' has no attribute '{name}'") - - def get_server_base_url() -> str: """Return the base URL of the currently configured server.