Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,11 @@ def _get_arff(self, format: str) -> dict: # noqa: A002
file_size = filepath.stat().st_size
if file_size > MB_120:
raise NotImplementedError(
f"File {filename} too big for {file_size}-bit system ({bits} bytes).",
f"File '{filename}' ({file_size / 1e6:.1f} MB)"
f"exceeds the maximum supported size of 120 MB. "
f"This limitation applies to {bits}-bit systems. "
f"Large dataset handling is currently not fully supported. "
f"Please consider using a smaller dataset"
)

if format.lower() == "arff":
Expand Down Expand Up @@ -780,7 +784,12 @@ def get_data( # noqa: C901
# All the assumptions below for the target are dependant on the number of targets being 1
n_targets = len(target_names)
if n_targets > 1:
raise NotImplementedError(f"Number of targets {n_targets} not implemented.")
raise NotImplementedError(
f"Multi-target prediction is not yet supported."
f"Found {n_targets} target columns: {target_names}. "
f"Currently, only single-target datasets are supported. "
f"Please select a single target column."
)

target_name = target_names[0]
x = data.drop(columns=[target_name])
Expand Down
22 changes: 19 additions & 3 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,12 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913
test_x = None
test_y = None
else:
raise NotImplementedError(task.task_type)
raise NotImplementedError(
f"Task type '{task.task_type}' is not supported. "
f"Only OpenMLSupervisedTask and OpenMLClusteringTask are currently implemented. "
f"Task details: task_id={getattr(task, 'task_id', 'unknown')}, "
f"task_class={task.__class__.__name__}"
)

config.logger.info(
f"Going to run model {model!s} on "
Expand Down Expand Up @@ -982,7 +987,13 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): # type: ignore
if "predictions" not in files and from_server is True:
task = openml.tasks.get_task(task_id)
if task.task_type_id == TaskType.SUBGROUP_DISCOVERY:
raise NotImplementedError("Subgroup discovery tasks are not yet supported.")
raise NotImplementedError(
f"Subgroup discovery tasks are not yet supported. "
f"Task ID: {task_id}. Please check the OpenML documentation"
f"for supported task types. "
f"Currently supported task types: Classification, Regression,"
f"Clustering, and Learning Curve."
)

# JvR: actually, I am not sure whether this error should be raised.
# a run can consist without predictions. But for now let's keep it
Expand Down Expand Up @@ -1282,7 +1293,12 @@ def format_prediction( # noqa: PLR0913
if isinstance(task, OpenMLRegressionTask):
return [repeat, fold, index, prediction, truth]

raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
raise NotImplementedError(
f"Formatting predictions for task type '{type(task).__name__}' is not supported. "
f"Supported task types: OpenMLClassificationTask, OpenMLRegressionTask,"
f"and OpenMLLearningCurveTask. "
f"Please ensure your task is one of these types."
)


def delete_run(run_id: int) -> bool:
Expand Down
7 changes: 6 additions & 1 deletion openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,12 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]:
]

else:
raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.")
raise NotImplementedError(
f"Task type '{task.task_type}' is not yet supported. "
f"Supported task types: Classification, Regression, Clustering, Learning Curve. "
f"Task ID: {task.task_id}. "
f"Please check the OpenML documentation for supported task types."
)

return arff_dict

Expand Down
15 changes: 13 additions & 2 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,12 @@ def _create_task_from_xml(xml: str) -> OpenMLTask:
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
raise NotImplementedError(
f"Task type '{common_kwargs['task_type']}' is not supported. "
f"Supported task types: SUPERVISED_CLASSIFICATION,"
f"SUPERVISED_REGRESSION, CLUSTERING, LEARNING_CURVE."
f"Please check the OpenML documentation for available task types."
)
return cls(**common_kwargs) # type: ignore


Expand Down Expand Up @@ -584,7 +589,13 @@ def create_task(
elif task_type == TaskType.SUPERVISED_REGRESSION:
task_cls = OpenMLRegressionTask # type: ignore
else:
raise NotImplementedError(f"Task type {task_type:d} not supported.")
raise NotImplementedError(
f"Task type ID {task_type:d} is not supported. "
f"Supported task type IDs: {TaskType.SUPERVISED_CLASSIFICATION.value},"
f"{TaskType.SUPERVISED_REGRESSION.value}, "
f"{TaskType.CLUSTERING.value}, {TaskType.LEARNING_CURVE.value}. "
f"Please refer to the TaskType enum for valid task type identifiers."
)

return task_cls(
task_type_id=task_type,
Expand Down
9 changes: 7 additions & 2 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,12 @@ def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]:
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
raise NotImplementedError(self.task_type)
raise NotImplementedError(
f"Task type '{self.task_type}' is not implemented for get_X_and_y(). "
f"Supported types: SUPERVISED_CLASSIFICATION, SUPERVISED_REGRESSION,"
f"LEARNING_CURVE."
f"Task ID: {getattr(self, 'task_id', 'unknown')}. "
)

X, y, _, _ = dataset.get_data(target=self.target_name)
return X, y
Expand Down Expand Up @@ -382,7 +387,7 @@ def __init__( # noqa: PLR0913
self.cost_matrix = cost_matrix

if cost_matrix is not None:
raise NotImplementedError("Costmatrix")
raise NotImplementedError("Costmatrix functionality is not yet implemented.")


class OpenMLRegressionTask(OpenMLSupervisedTask):
Expand Down
Loading