diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index fa83d2b8a..d4111b64a 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -420,7 +420,11 @@ def _get_arff(self, format: str) -> dict: # noqa: A002 file_size = filepath.stat().st_size if file_size > MB_120: raise NotImplementedError( - f"File {filename} too big for {file_size}-bit system ({bits} bytes).", + f"File '{filename}' ({file_size / 1e6:.1f} MB)" + f"exceeds the maximum supported size of 120 MB. " + f"This limitation applies to {bits}-bit systems. " + f"Large dataset handling is currently not fully supported. " + f"Please consider using a smaller dataset" ) if format.lower() == "arff": @@ -780,7 +784,12 @@ def get_data( # noqa: C901 # All the assumptions below for the target are dependant on the number of targets being 1 n_targets = len(target_names) if n_targets > 1: - raise NotImplementedError(f"Number of targets {n_targets} not implemented.") + raise NotImplementedError( + f"Multi-target prediction is not yet supported." + f"Found {n_targets} target columns: {target_names}. " + f"Currently, only single-target datasets are supported. " + f"Please select a single target column." + ) target_name = target_names[0] x = data.drop(columns=[target_name]) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 666b75c37..e0089e5f9 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -755,7 +755,12 @@ def _run_task_get_arffcontent_parallel_helper( # noqa: PLR0913 test_x = None test_y = None else: - raise NotImplementedError(task.task_type) + raise NotImplementedError( + f"Task type '{task.task_type}' is not supported. " + f"Only OpenMLSupervisedTask and OpenMLClusteringTask are currently implemented. " + f"Task details: task_id={getattr(task, 'task_id', 'unknown')}, " + f"task_class={task.__class__.__name__}" + ) config.logger.info( f"Going to run model {model!s} on " @@ -982,7 +987,13 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None): # type: ignore if "predictions" not in files and from_server is True: task = openml.tasks.get_task(task_id) if task.task_type_id == TaskType.SUBGROUP_DISCOVERY: - raise NotImplementedError("Subgroup discovery tasks are not yet supported.") + raise NotImplementedError( + f"Subgroup discovery tasks are not yet supported. " + f"Task ID: {task_id}. Please check the OpenML documentation" + f"for supported task types. " + f"Currently supported task types: Classification, Regression," + f"Clustering, and Learning Curve." + ) # JvR: actually, I am not sure whether this error should be raised. # a run can consist without predictions. But for now let's keep it @@ -1282,7 +1293,12 @@ def format_prediction( # noqa: PLR0913 if isinstance(task, OpenMLRegressionTask): return [repeat, fold, index, prediction, truth] - raise NotImplementedError(f"Formatting for {type(task)} is not supported.") + raise NotImplementedError( + f"Formatting predictions for task type '{type(task).__name__}' is not supported. " + f"Supported task types: OpenMLClassificationTask, OpenMLRegressionTask," + f"and OpenMLLearningCurveTask. " + f"Please ensure your task is one of these types." + ) def delete_run(run_id: int) -> bool: diff --git a/openml/runs/run.py b/openml/runs/run.py index 945264131..abb89a3ab 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -480,7 +480,12 @@ def _generate_arff_dict(self) -> OrderedDict[str, Any]: ] else: - raise NotImplementedError(f"Task type {task.task_type!s} is not yet supported.") + raise NotImplementedError( + f"Task type '{task.task_type}' is not yet supported. " + f"Supported task types: Classification, Regression, Clustering, Learning Curve. " + f"Task ID: {task.task_id}. " + f"Please check the OpenML documentation for supported task types." + ) return arff_dict diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index e9b879ae4..b646cf725 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -528,7 +528,12 @@ def _create_task_from_xml(xml: str) -> OpenMLTask: TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, }.get(task_type) if cls is None: - raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + raise NotImplementedError( + f"Task type '{common_kwargs['task_type']}' is not supported. " + f"Supported task types: SUPERVISED_CLASSIFICATION," + f"SUPERVISED_REGRESSION, CLUSTERING, LEARNING_CURVE." + f"Please check the OpenML documentation for available task types." + ) return cls(**common_kwargs) # type: ignore @@ -584,7 +589,13 @@ def create_task( elif task_type == TaskType.SUPERVISED_REGRESSION: task_cls = OpenMLRegressionTask # type: ignore else: - raise NotImplementedError(f"Task type {task_type:d} not supported.") + raise NotImplementedError( + f"Task type ID {task_type:d} is not supported. " + f"Supported task type IDs: {TaskType.SUPERVISED_CLASSIFICATION.value}," + f"{TaskType.SUPERVISED_REGRESSION.value}, " + f"{TaskType.CLUSTERING.value}, {TaskType.LEARNING_CURVE.value}. " + f"Please refer to the TaskType enum for valid task type identifiers." + ) return task_cls( task_type_id=task_type, diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 395b52482..fb0c77eff 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -290,7 +290,12 @@ def get_X_and_y(self) -> tuple[pd.DataFrame, pd.Series | pd.DataFrame | None]: TaskType.SUPERVISED_REGRESSION, TaskType.LEARNING_CURVE, ): - raise NotImplementedError(self.task_type) + raise NotImplementedError( + f"Task type '{self.task_type}' is not implemented for get_X_and_y(). " + f"Supported types: SUPERVISED_CLASSIFICATION, SUPERVISED_REGRESSION," + f"LEARNING_CURVE." + f"Task ID: {getattr(self, 'task_id', 'unknown')}. " + ) X, y, _, _ = dataset.get_data(target=self.target_name) return X, y @@ -382,7 +387,7 @@ def __init__( # noqa: PLR0913 self.cost_matrix = cost_matrix if cost_matrix is not None: - raise NotImplementedError("Costmatrix") + raise NotImplementedError("Costmatrix functionality is not yet implemented.") class OpenMLRegressionTask(OpenMLSupervisedTask):