Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, strict=False):
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
5 changes: 5 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

API_V1_SERVER = "https://www.openml.org/api/v1/xml"
API_V2_SERVER = "http://127.0.0.1:8001"
API_KEY = "..."
1 change: 1 addition & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from openml._api.http.client import HTTPClient
23 changes: 23 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

import requests

from openml.__version__ import __version__


class HTTPClient:
def __init__(self, base_url: str):
self.base_url = base_url
self.headers = {"user-agent": f"openml-python/{__version__}"}

def get(self, path, params=None):
url = f"{self.base_url}/{path}"
return requests.get(url, params=params, headers=self.headers)

def post(self, path, data=None, files=None):
url = f"{self.base_url}/{path}"
return requests.post(url, data=data, files=files, headers=self.headers)

def delete(self, path, params=None):
url = f"{self.base_url}/{path}"
return requests.delete(url, params=params, headers=self.headers)
Empty file added openml/_api/http/utils.py
Empty file.
2 changes: 2 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.tasks import TasksV1, TasksV2
22 changes: 22 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from openml._api.http import HTTPClient


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, id: int) -> dict: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, id: int) -> dict: ...
13 changes: 13 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from __future__ import annotations

from openml._api.resources.base import DatasetsAPI


class DatasetsV1(DatasetsAPI):
def get(self, id):
pass


class DatasetsV2(DatasetsAPI):
def get(self, id):
pass
113 changes: 113 additions & 0 deletions openml/_api/resources/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from __future__ import annotations

import xmltodict

from openml._api.resources.base import TasksAPI
from openml.tasks.task import (
OpenMLClassificationTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
OpenMLRegressionTask,
OpenMLTask,
TaskType,
)


class TasksV1(TasksAPI):
def get(self, id, return_response=False):
path = f"task/{id}"
response = self._http.get(path)
xml_content = response.content
task = self._create_task_from_xml(xml_content)

if return_response:
return task, response

return task

def _create_task_from_xml(self, xml: str) -> OpenMLTask:
"""Create a task given a xml string.

Parameters
----------
xml : string
Task xml representation.

Returns
-------
OpenMLTask
"""
dic = xmltodict.parse(xml)["oml:task"]
estimation_parameters = {}
inputs = {}
# Due to the unordered structure we obtain, we first have to extract
# the possible keys of oml:input; dic["oml:input"] is a list of
# OrderedDicts

# Check if there is a list of inputs
if isinstance(dic["oml:input"], list):
for input_ in dic["oml:input"]:
name = input_["@name"]
inputs[name] = input_
# Single input case
elif isinstance(dic["oml:input"], dict):
name = dic["oml:input"]["@name"]
inputs[name] = dic["oml:input"]

evaluation_measures = None
if "evaluation_measures" in inputs:
evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
"oml:evaluation_measure"
]

task_type = TaskType(int(dic["oml:task_type_id"]))
common_kwargs = {
"task_id": dic["oml:task_id"],
"task_type": dic["oml:task_type"],
"task_type_id": task_type,
"data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
"evaluation_measure": evaluation_measures,
}
# TODO: add OpenMLClusteringTask?
if task_type in (
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
# Convert some more parameters
for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
"oml:parameter"
]:
name = parameter["@name"]
text = parameter.get("#text", "")
estimation_parameters[name] = text

common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:type"]
common_kwargs["estimation_procedure_id"] = int(
inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
)

common_kwargs["estimation_parameters"] = estimation_parameters
common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
"oml:target_feature"
]
common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:data_splits_url"]

cls = {
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
return cls(**common_kwargs) # type: ignore


class TasksV2(TasksAPI):
def get(self, id):
pass
58 changes: 58 additions & 0 deletions openml/_api/runtime/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from __future__ import annotations

from openml._api.config import (
API_V1_SERVER,
API_V2_SERVER,
)
from openml._api.http.client import HTTPClient
from openml._api.resources import (
DatasetsV1,
DatasetsV2,
TasksV1,
TasksV2,
)
from openml._api.runtime.fallback import FallbackProxy


class APIBackend:
def __init__(self, *, datasets, tasks):
self.datasets = datasets
self.tasks = tasks


def build_backend(version: str, strict: bool) -> APIBackend:
v1_http = HTTPClient(API_V1_SERVER)
v2_http = HTTPClient(API_V2_SERVER)

v1 = APIBackend(
datasets=DatasetsV1(v1_http),
tasks=TasksV1(v1_http),
)

if version == "v1":
return v1

v2 = APIBackend(
datasets=DatasetsV2(v2_http),
tasks=TasksV2(v2_http),
)

if strict:
return v2

return APIBackend(
datasets=FallbackProxy(v2.datasets, v1.datasets),
tasks=FallbackProxy(v2.tasks, v1.tasks),
)


class APIContext:
def __init__(self):
self._backend = build_backend("v1", strict=False)

def set_version(self, version: str, strict: bool = False):
self._backend = build_backend(version, strict)

@property
def backend(self):
return self._backend
5 changes: 5 additions & 0 deletions openml/_api/runtime/fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations


class FallbackProxy:
pass
8 changes: 5 additions & 3 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import openml._api_calls
import openml.utils
from openml._api import api_context
from openml.datasets import get_dataset
from openml.exceptions import OpenMLCacheException

Expand Down Expand Up @@ -444,11 +445,12 @@ def _get_task_description(task_id: int) -> OpenMLTask:
except OpenMLCacheException:
_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
xml_file = _cache_dir / "task.xml"
task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get")
task, response = api_context.backend.tasks.get(task_id, return_response=True)

with xml_file.open("w", encoding="utf8") as fh:
fh.write(task_xml)
return _create_task_from_xml(task_xml)
fh.write(response.text)

return task


def _create_task_from_xml(xml: str) -> OpenMLTask:
Expand Down
Loading