diff --git a/.github/workflows/cross-version-tests.yml b/.github/workflows/cross-version-tests.yml index c1dca09262dca..aeee9d7fb893e 100644 --- a/.github/workflows/cross-version-tests.yml +++ b/.github/workflows/cross-version-tests.yml @@ -13,6 +13,7 @@ jobs: runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} + is_matrix_empty: ${{ steps.set-matrix.outputs.is_matrix_empty }} steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 @@ -20,7 +21,7 @@ jobs: python-version: "3.6" - name: Install dependencies run: | - pip install pyyaml pytest + pip install packaging pyyaml pytest - name: Test set_matrix.py run: | pytest dev/set_matrix.py --doctest-modules --verbose @@ -40,6 +41,7 @@ jobs: fi test: needs: set-matrix + if: ${{ needs.set-matrix.outputs.is_matrix_empty == 'false' }} runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/dev/set_matrix.py b/dev/set_matrix.py index 974a7b2825526..1d437bf224063 100644 --- a/dev/set_matrix.py +++ b/dev/set_matrix.py @@ -34,7 +34,7 @@ """ import argparse -from distutils.version import LooseVersion +from packaging.version import Version import json import operator import os @@ -104,35 +104,6 @@ def get_released_versions(package_name): return versions -def get_major_version(ver): - """ - Examples - -------- - >>> get_major_version("1.2.3") - 1 - """ - return LooseVersion(ver).version[0] - - -def is_final_release(ver): - """ - Returns True if the given version matches PEP440's final release scheme. - - Examples - -------- - >>> is_final_release("0.1") - True - >>> is_final_release("0.23.0") - True - >>> is_final_release("0.4.0a1") - False - >>> is_final_release("0.5.0rc") - False - """ - # Ref.: https://www.python.org/dev/peps/pep-0440/#final-releases - return re.search(r"^\d+(\.\d+)+$", ver) is not None - - def select_latest_micro_versions(versions): """ Selects the latest micro version in each minor version. @@ -155,10 +126,10 @@ def select_latest_micro_versions(versions): for ver, _ in sorted( versions.items(), # Sort by (minor_version, upload_time) in descending order - key=lambda x: (LooseVersion(x[0]).version[:2], x[1]), + key=lambda x: (Version(x[0]).release[:2], x[1]), reverse=True, ): - minor_ver = tuple(LooseVersion(ver).version[:2]) # A set doesn't accept a list + minor_ver = Version(ver).release[:2] if minor_ver not in seen_minors: seen_minors.add(minor_ver) @@ -171,9 +142,10 @@ def filter_versions(versions, min_ver, max_ver, excludes=None): """ Filter versions that satisfy the following conditions: - 1. is newer than or equal to `min_ver` - 2. shares the same major version as `max_ver` or `min_ver` - 3. (Optional) is not in `excludes` + 1. is a final or post release that PEP 440 defines + 2. is newer than or equal to `min_ver` + 3. shares the same major version as `max_ver` or `min_ver` + 4. (Optional) is not in `excludes` Examples -------- @@ -198,12 +170,16 @@ def filter_versions(versions, min_ver, max_ver, excludes=None): assert max_ver in versions assert all(v in versions for v in excludes) - versions = {v: t for v, t in versions.items() if v not in excludes} - versions = {v: t for v, t in versions.items() if is_final_release(v)} + versions = {Version(v): t for v, t in versions.items() if v not in excludes} - max_major = get_major_version(max_ver) - versions = {v: t for v, t in versions.items() if get_major_version(v) <= max_major} - versions = {v: t for v, t in versions.items() if LooseVersion(v) >= LooseVersion(min_ver)} + def _is_final_or_post_release(v): + # final release: https://www.python.org/dev/peps/pep-0440/#final-releases + # post release: https://www.python.org/dev/peps/pep-0440/#post-releases + return (v.base_version == v.public) or (v.is_postrelease) + + versions = {v: t for v, t in versions.items() if _is_final_or_post_release(v)} + versions = {v: t for v, t in versions.items() if v.major <= Version(max_ver).major} + versions = {str(v): t for v, t in versions.items() if v >= Version(min_ver)} return versions @@ -324,8 +300,7 @@ def process_requirements(requirements, version=None): op_and_ver_pairs = map(get_operator_and_version, ver_spec.split(",")) match_all = all( comp_op( - LooseVersion(version), - LooseVersion(dev_numeric if req_ver == DEV_VERSION else req_ver), + Version(version), Version(dev_numeric if req_ver == DEV_VERSION else req_ver), ) for comp_op, req_ver in op_and_ver_pairs ) @@ -475,7 +450,9 @@ def main(): ) diff_flavor = set(filter(lambda x: x["flavor"] in changed_flavors, matrix)) - include = sorted(diff_config.union(diff_flavor), key=lambda x: x["job_name"]) + # If this file contains changes, re-run all the tests, otherwise re-run the affected tests. + include = matrix if (__file__ in changed_files) else diff_config.union(diff_flavor) + include = sorted(include, key=lambda x: x["job_name"]) job_names = [x["job_name"] for x in include] matrix = {"job_name": job_names, "include": include} @@ -488,6 +465,10 @@ def main(): # Note that this actually doesn't print anything to the console. print("::set-output name=matrix::{}".format(json.dumps(matrix))) + # Set a flag that indicates whether or not the matrix is empty. If this flag is 'true', + # skip the subsequent jobs. + print("::set-output name=is_matrix_empty::{}".format("false" if job_names else "true")) + if __name__ == "__main__": main() diff --git a/docs/source/models.rst b/docs/source/models.rst index 79bd9f2ea1c41..2021ad7d19f22 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -146,8 +146,22 @@ names, matching is done by position (i.e. MLflow will only check the number of c Column Type Enforcement """"""""""""""""""""""" The input column types are checked against the signature. MLflow will perform safe type conversions -if necessary. Generally, only upcasts (e.g. integer -> long or float -> double) are considered to be -safe. If the types cannot be made compatible, MLflow will raise an error. +if necessary. Generally, only conversions that are guaranteed to be lossless are allowed. For +example, int -> long or int -> double conversions are ok, long -> double is not. If the types cannot +be made compatible, MLflow will raise an error. + +Handling Integers With Missing Values +""""""""""""""""""""""""""""""""""""" +Integer data with missing values is typically represented as floats in Python. Therefore, data +types of integer columns in Python can vary depending on the data sample. This type variance can +cause schema enforcement errors at runtime since integer and float are not compatible types. For +example, if your training data did not have any missing values for integer column c, its type will +be integer. However, when you attempt to score a sample of the data that does include a missing +value in column c, its type will be float. If your model signature specified c to have integer type, +MLflow will raise an error since it can not convert float to int. Note that MLflow uses python to +serve models and to deploy models to Spark, so this can affect most model deployments. The best way +to avoid this problem is to declare integer columns as doubles (float64) whenever there can be +missing values. How To Log Models With Signatures ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/ml-package-versions.yml b/ml-package-versions.yml index c1910a8696e77..11a262166a5ca 100644 --- a/ml-package-versions.yml +++ b/ml-package-versions.yml @@ -152,3 +152,39 @@ lightgbm: requirements: ["scikit-learn", "matplotlib"] run: | pytest tests/lightgbm/test_lightgbm_autolog.py --large + +gluon: + package_info: + pip_release: "mxnet" + install_dev: | + pip install --pre mxnet -f https://dist.mxnet.io/python/cpu + + models: + minimum: "1.5.1" + maximum: "1.7.0.post1" + run: | + pytest tests/gluon/test_gluon_model_export.py --large + + autologging: + minimum: "1.5.1" + maximum: "1.7.0.post1" + run: | + pytest tests/gluon_autolog/test_gluon_autolog.py --large + +fastai-1.x: + package_info: + pip_release: "fastai" + + models: + minimum: "1.0.60" + maximum: "1.0.61" + requirements: ["scikit-learn"] + run: | + pytest tests/fastai/test_fastai_model_export.py --large + + autologging: + minimum: "1.0.60" + maximum: "1.0.61" + requirements: ["scikit-learn"] + run: | + pytest tests/fastai/test_fastai_autolog.py --large diff --git a/mlflow/_spark_autologging.py b/mlflow/_spark_autologging.py index 1ddba8643695a..4b672b44bce5e 100644 --- a/mlflow/_spark_autologging.py +++ b/mlflow/_spark_autologging.py @@ -15,7 +15,11 @@ from mlflow.tracking.client import MlflowClient from mlflow.tracking.context.abstract_context import RunContextProvider from mlflow.utils import gorilla -from mlflow.utils.autologging_utils import wrap_patch +from mlflow.utils.autologging_utils import ( + wrap_patch, + autologging_is_disabled, +) +from mlflow.spark import FLAVOR_NAME _JAVA_PACKAGE = "org.mlflow.spark.autologging" _SPARK_TABLE_INFO_TAG_NAME = "sparkDatasourceInfo" @@ -217,6 +221,8 @@ def _notify(self, path, version, data_format): Method called by Scala SparkListener to propagate datasource read events to the current Python process """ + if autologging_is_disabled(FLAVOR_NAME): + return # If there's an active run, simply set the tag on it # Note that there's a TOCTOU race condition here - active_run() here can actually throw # if the main thread happens to end the run & pop from the active run stack after we check @@ -248,6 +254,9 @@ def in_context(self): return True def tags(self): + # if autologging is disabled, then short circuit `tags()` and return empty dict. + if autologging_is_disabled(FLAVOR_NAME): + return {} with _lock: global _table_infos seen = set() diff --git a/mlflow/gluon.py b/mlflow/gluon.py index 3a2ee4fac83fa..11bcdfbf87d4c 100644 --- a/mlflow/gluon.py +++ b/mlflow/gluon.py @@ -1,3 +1,4 @@ +from distutils.version import LooseVersion import os import pandas as pd @@ -48,6 +49,7 @@ def load_model(model_uri, ctx): model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model") model(nd.array(np.random.rand(1000, 1, 32))) """ + import mxnet from mxnet import gluon from mxnet import sym @@ -58,7 +60,10 @@ def load_model(model_uri, ctx): symbol = sym.load(model_arch_path) inputs = sym.var("data", dtype="float32") net = gluon.SymbolBlock(symbol, inputs) - net.collect_params().load(model_params_path, ctx) + if LooseVersion(mxnet.__version__) >= LooseVersion("2.0.0"): + net.load_parameters(model_params_path, ctx) + else: + net.collect_params().load(model_params_path, ctx) return net diff --git a/mlflow/keras.py b/mlflow/keras.py index 3b65969aa8452..9be7217f03889 100644 --- a/mlflow/keras.py +++ b/mlflow/keras.py @@ -24,14 +24,15 @@ from mlflow.models.signature import ModelSignature from mlflow.models.utils import ModelInputExample, _save_example from mlflow.tracking.artifact_utils import _download_artifact_from_uri -from mlflow.utils import gorilla from mlflow.utils.environment import _mlflow_conda_env from mlflow.utils.model_utils import _get_flavor_configuration from mlflow.utils.annotations import experimental from mlflow.utils.autologging_utils import ( + autologging_integration, + safe_patch, + ExceptionSafeClass, try_mlflow_log, log_fn_args_as_params, - wrap_patch, batch_metrics_logger, ) from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS @@ -561,10 +562,12 @@ def load_model(model_uri, **kwargs): @experimental -def autolog(log_models=True): +@autologging_integration(FLAVOR_NAME) +def autolog(log_models=True, disable=False): # pylint: disable=unused-argument # pylint: disable=E0611 """ - Enables automatic logging from Keras to MLflow. Autologging captures the following information: + Enables (or disables) and configures autologging from Keras to MLflow. Autologging captures + the following information: **Metrics** and **Parameters** - Training loss; validation loss; user-specified metrics @@ -611,11 +614,13 @@ def autolog(log_models=True): :param log_models: If ``True``, trained models are logged as MLflow model artifacts. If ``False``, trained models are not logged. + :param disable: If ``True``, disables all supported autologging integrations. If ``False``, + enables all supported autologging integrations. """ import keras def getKerasCallback(metrics_logger): - class __MLflowKerasCallback(keras.callbacks.Callback): + class __MLflowKerasCallback(keras.callbacks.Callback, metaclass=ExceptionSafeClass): """ Callback for auto-logging metrics and parameters. Records available logs after each epoch. @@ -691,17 +696,14 @@ def _early_stop_check(callbacks): def _log_early_stop_callback_params(callback): if callback: - try: - earlystopping_params = { - "monitor": callback.monitor, - "min_delta": callback.min_delta, - "patience": callback.patience, - "baseline": callback.baseline, - "restore_best_weights": callback.restore_best_weights, - } - try_mlflow_log(mlflow.log_params, earlystopping_params) - except Exception: # pylint: disable=W0703 - return + earlystopping_params = { + "monitor": callback.monitor, + "min_delta": callback.min_delta, + "patience": callback.patience, + "baseline": callback.baseline, + "restore_best_weights": callback.restore_best_weights, + } + try_mlflow_log(mlflow.log_params, earlystopping_params) def _get_early_stop_callback_attrs(callback): try: @@ -731,12 +733,6 @@ def _log_early_stop_callback_metrics(callback, history, metrics_logger): metrics_logger.record_metrics(restored_metrics, last_epoch) def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): - if not mlflow.active_run(): - try_mlflow_log(mlflow.start_run) - auto_end_run = True - else: - auto_end_run = False - log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None @@ -755,37 +751,34 @@ def _run_and_log_function(self, original, args, kwargs, unlogged_params, callbac else: kwargs["callbacks"] = [mlflowKerasCallback] - _log_early_stop_callback_params(early_stop_callback) + try_mlflow_log(_log_early_stop_callback_params, early_stop_callback) history = original(self, *args, **kwargs) - _log_early_stop_callback_metrics(early_stop_callback, history, metrics_logger) - - if auto_end_run: - try_mlflow_log(mlflow.end_run) + try_mlflow_log( + _log_early_stop_callback_metrics, early_stop_callback, history, metrics_logger + ) return history - def fit(self, *args, **kwargs): - original = gorilla.get_original_attribute(keras.Model, "fit") + def fit(original, self, *args, **kwargs): unlogged_params = ["self", "x", "y", "callbacks", "validation_data", "verbose"] return _run_and_log_function(self, original, args, kwargs, unlogged_params, 5) - def fit_generator(self, *args, **kwargs): + def fit_generator(original, self, *args, **kwargs): """ NOTE: `fit_generator()` is deprecated in Keras >= 2.4.0 and simply wraps `fit()`. To avoid unintentional creation of nested MLflow runs caused by a patched `fit_generator()` method calling a patched `fit()` method, we only patch `fit_generator()` in Keras < 2.4.0. """ - original = gorilla.get_original_attribute(keras.Model, "fit_generator") unlogged_params = ["self", "generator", "callbacks", "validation_data", "verbose"] return _run_and_log_function(self, original, args, kwargs, unlogged_params, 4) - wrap_patch(keras.Model, "fit", fit) + safe_patch(FLAVOR_NAME, keras.Model, "fit", fit, manage_run=True) # `fit_generator()` is deprecated in Keras >= 2.4.0 and simply wraps `fit()`. # To avoid unintentional creation of nested MLflow runs caused by a patched # `fit_generator()` method calling a patched `fit()` method, we only patch # `fit_generator()` in Keras < 2.4.0. if LooseVersion(keras.__version__) < LooseVersion("2.4.0"): - wrap_patch(keras.Model, "fit_generator", fit_generator) + safe_patch(FLAVOR_NAME, keras.Model, "fit_generator", fit_generator, manage_run=True) diff --git a/mlflow/pyfunc/__init__.py b/mlflow/pyfunc/__init__.py index d12ffb3ddabc8..03d5528e59da4 100644 --- a/mlflow/pyfunc/__init__.py +++ b/mlflow/pyfunc/__init__.py @@ -290,6 +290,7 @@ def _enforce_type(name, values: pandas.Series, t: DataType): 1. np.object -> string 2. int -> long (upcast) 3. float -> double (upcast) + 4. int -> double (safe conversion) Any other type mismatch will raise error. """ @@ -310,7 +311,10 @@ def _enforce_type(name, values: pandas.Series, t: DataType): "Failed to convert column {0} from type {1} to {2}.".format(name, values.dtype, t) ) - if values.dtype in (t.to_pandas(), t.to_numpy()): + # NB: Comparison of pandas and numpy data type fails when numpy data type is on the left hand + # side of the comparison operator. It works, however, if pandas type is on the left hand side. + # That is because pandas is aware of numpy. + if t.to_pandas() == values.dtype or t.to_numpy() == values.dtype: # The types are already compatible => conversion is not necessary. return values @@ -321,17 +325,46 @@ def _enforce_type(name, values: pandas.Series, t: DataType): return values numpy_type = t.to_numpy() - is_compatible_type = values.dtype.kind == numpy_type.kind - is_upcast = values.dtype.itemsize <= numpy_type.itemsize - if is_compatible_type and is_upcast: + if values.dtype.kind == numpy_type.kind: + is_upcast = values.dtype.itemsize <= numpy_type.itemsize + elif values.dtype.kind == "u" and numpy_type.kind == "i": + is_upcast = values.dtype.itemsize < numpy_type.itemsize + elif values.dtype.kind in ("i", "u") and numpy_type == np.float64: + # allow (u)int => double conversion + is_upcast = values.dtype.itemsize <= 6 + else: + is_upcast = False + + if is_upcast: return values.astype(numpy_type, errors="raise") else: # NB: conversion between incompatible types (e.g. floats -> ints or # double -> float) are not allowed. While supported by pandas and numpy, # these conversions alter the values significantly. + def all_ints(xs): + return all([pandas.isnull(x) or int(x) == x for x in xs]) + + hint = "" + if ( + values.dtype == np.float64 + and numpy_type.kind in ("i", "u") + and values.hasnans + and all_ints(values) + ): + hint = ( + " Hint: the type mismatch is likely caused by missing values. " + "Integer columns in python can not represent missing values and are therefore " + "encoded as floats. The best way to avoid this problem is to infer the model " + "schema based on a realistic data sample (training dataset) that includes missing " + "values. Alternatively, you can declare integer columns as doubles (float64) " + "whenever these columns may have missing values. See `Handling Integers With " + "Missing Values `_ for more details." + ) + raise MlflowException( "Incompatible input types for column {0}. " - "Can not safely convert {1} to {2}.".format(name, values.dtype, numpy_type) + "Can not safely convert {1} to {2}.{3}".format(name, values.dtype, numpy_type, hint) ) @@ -399,7 +432,7 @@ class PyFuncModel(object): ``model_impl`` can be any Python object that implements the `Pyfunc interface `_, and is - by invoking the model's ``loader_module``. + returned by invoking the model's ``loader_module``. ``model_meta`` contains model metadata loaded from the MLmodel file. """ @@ -415,10 +448,16 @@ def __init__(self, model_meta: Model, model_impl: Any): def predict(self, data: pandas.DataFrame) -> PyFuncOutput: """ Generate model predictions. + + If the model contains signature, enforce the input schema first before calling the model + implementation with the sanitized input. If the pyfunc model does not include model schema, + the input is passed to the model implementation as is. See `Model Signature Enforcement + `_ for more details." + :param data: Model input as pandas.DataFrame. :return: Model predictions as one of pandas.DataFrame, pandas.Series, numpy.ndarray or list. """ - input_schema = self._model_meta.get_input_schema() + input_schema = self.metadata.get_input_schema() if input_schema is not None: data = _enforce_schema(data, input_schema) return self._model_impl.predict(data) diff --git a/mlflow/sklearn/__init__.py b/mlflow/sklearn/__init__.py index 5a8e093c4bd65..15d66f3f3f45d 100644 --- a/mlflow/sklearn/__init__.py +++ b/mlflow/sklearn/__init__.py @@ -18,7 +18,6 @@ import mlflow from mlflow import pyfunc -from mlflow.entities.run_status import RunStatus from mlflow.exceptions import MlflowException from mlflow.models import Model from mlflow.models.model import MLMODEL_FILE_NAME @@ -27,13 +26,13 @@ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE, INTERNAL_ERROR from mlflow.protos.databricks_pb2 import RESOURCE_ALREADY_EXISTS from mlflow.tracking.artifact_utils import _download_artifact_from_uri -from mlflow.utils import gorilla from mlflow.utils.annotations import experimental from mlflow.utils.environment import _mlflow_conda_env from mlflow.utils.model_utils import _get_flavor_configuration from mlflow.utils.autologging_utils import ( + autologging_integration, + safe_patch, try_mlflow_log, - wrap_patch, INPUT_EXAMPLE_SAMPLE_ROWS, resolve_input_example_and_signature, ) @@ -532,9 +531,12 @@ def should_log(self): @experimental -def autolog(log_input_examples=False, log_model_signatures=True, log_models=True): +@autologging_integration(FLAVOR_NAME) +def autolog( + log_input_examples=False, log_model_signatures=True, log_models=True, disable=False +): # pylint: disable=unused-argument """ - Enables autologging for scikit-learn estimators. + Enables (or disables) and configures autologging for scikit-learn estimators. **When is autologging performed?** Autologging is performed when you call: @@ -712,6 +714,8 @@ def fetch_logged_data(run_id): If ``False``, trained models are not logged. Input examples and model signatures, which are attributes of MLflow models, are also omitted when ``log_models`` is ``False``. + :param disable: If ``True``, disables all supported autologging integrations. If ``False``, + enables all supported autologging integrations. """ import pandas as pd import sklearn @@ -749,32 +753,15 @@ def fetch_logged_data(run_id): stacklevel=2, ) - def fit_mlflow(self, clazz, func_name, *args, **kwargs): + def fit_mlflow(original, self, *args, **kwargs): """ Autologging function that performs model training by executing the training method referred to be `func_name` on the instance of `clazz` referred to by `self` & records MLflow parameters, metrics, tags, and artifacts to a corresponding MLflow Run. """ - should_start_run = mlflow.active_run() is None - if should_start_run: - try_mlflow_log(mlflow.start_run) - _log_pretraining_metadata(self, *args, **kwargs) - - original_fit = gorilla.get_original_attribute(clazz, func_name) - try: - fit_output = original_fit(self, *args, **kwargs) - except Exception as e: - if should_start_run: - try_mlflow_log(mlflow.end_run, RunStatus.to_string(RunStatus.FAILED)) - - raise e - + fit_output = original(self, *args, **kwargs) _log_posttraining_metadata(self, *args, **kwargs) - - if should_start_run: - try_mlflow_log(mlflow.end_run) - return fit_output def _log_pretraining_metadata(estimator, *args, **kwargs): # pylint: disable=unused-argument @@ -923,7 +910,7 @@ def infer_model_signature(input_example): ) _logger.warning(msg) - def patched_fit(self, clazz, func_name, *args, **kwargs): + def patched_fit(original, self, *args, **kwargs): """ Autologging patch function to be applied to a sklearn model class that defines a `fit` method and inherits from `BaseEstimator` (thereby defining the `get_params()` method) @@ -934,18 +921,11 @@ def patched_fit(self, clazz, func_name, *args, **kwargs): for autologging (e.g., specify "fit" in order to indicate that `sklearn.linear_model.LogisticRegression.fit()` is being patched) """ - with _SklearnTrainingSession(clazz=clazz, allow_children=False) as t: + with _SklearnTrainingSession(clazz=self.__class__, allow_children=False) as t: if t.should_log(): - return fit_mlflow(self, clazz, func_name, *args, **kwargs) + return fit_mlflow(original, self, *args, **kwargs) else: - original_fit = gorilla.get_original_attribute(clazz, func_name) - return original_fit(self, *args, **kwargs) - - def create_patch_func(clazz, func_name): - def f(self, *args, **kwargs): - return patched_fit(self, clazz, func_name, *args, **kwargs) - - return f + return original(self, *args, **kwargs) _, estimators_to_patch = zip(*_all_estimators()) # Ensure that relevant meta estimators (e.g. GridSearchCV, Pipeline) are selected @@ -998,5 +978,6 @@ def f(self, *args, **kwargs): if isinstance(original, property): continue - patch_func = create_patch_func(class_def, func_name) - wrap_patch(class_def, func_name, patch_func) + safe_patch( + FLAVOR_NAME, class_def, func_name, patched_fit, manage_run=True, + ) diff --git a/mlflow/spark.py b/mlflow/spark.py index 6d1341d98f5d8..d7331c014a363 100644 --- a/mlflow/spark.py +++ b/mlflow/spark.py @@ -41,6 +41,7 @@ from mlflow.utils.model_utils import _get_flavor_configuration_from_uri from mlflow.utils.annotations import experimental from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS +from mlflow.utils.autologging_utils import autologging_integration FLAVOR_NAME = "spark" @@ -627,9 +628,10 @@ def predict(self, pandas_df): @experimental -def autolog(): +@autologging_integration(FLAVOR_NAME) +def autolog(disable=False): # pylint: disable=unused-argument """ - Enables automatic logging of Spark datasource paths, versions (if applicable), and formats + Enables (or disables) and configures logging of Spark datasource paths, versions (if applicable), and formats when they are read. This method is not threadsafe and assumes a `SparkSession `_ @@ -682,6 +684,9 @@ def autolog(): # next-created MLflow run if no run is currently active with mlflow.start_run() as active_run: pandas_df = loaded_df.toPandas() + + :param disable: If ``True``, disables all supported autologging integrations. + If ``False``, enables all supported autologging integrations. """ from mlflow import _spark_autologging diff --git a/mlflow/statsmodels.py b/mlflow/statsmodels.py index 44ee62cde4074..04444c7d5aed7 100644 --- a/mlflow/statsmodels.py +++ b/mlflow/statsmodels.py @@ -30,6 +30,7 @@ from mlflow.exceptions import MlflowException from mlflow.utils.annotations import experimental from mlflow.utils.autologging_utils import try_mlflow_log, log_fn_args_as_params +from mlflow.utils.validation import _is_numeric import itertools import inspect @@ -464,7 +465,7 @@ def results_to_dict(results): renamed_keys_dict = prepend_to_keys(d, f) results_dict.update(renamed_keys_dict) - elif isinstance(field, (int, float)): + elif _is_numeric(field): results_dict[f] = field except AttributeError: diff --git a/mlflow/tracking/fluent.py b/mlflow/tracking/fluent.py index e652ba0868056..e5a729d76f337 100644 --- a/mlflow/tracking/fluent.py +++ b/mlflow/tracking/fluent.py @@ -1212,10 +1212,10 @@ def _get_experiment_id(): def autolog( - log_input_examples=False, log_model_signatures=True, log_models=True + log_input_examples=False, log_model_signatures=True, log_models=True, disable=False ): # pylint: disable=unused-argument """ - Enable autologging for all supported integrations. + Enables (or disables) and configures autologging for all supported integrations. The parameters are passed to any autologging integrations that support them. @@ -1237,6 +1237,8 @@ def autolog( If ``False``, trained models are not logged. Input examples and model signatures, which are attributes of MLflow models, are also omitted when ``log_models`` is ``False``. + :param disable: If ``True``, disables all supported autologging integrations. If ``False``, + enables all supported autologging integrations. .. code-block:: python :caption: Example diff --git a/mlflow/types/utils.py b/mlflow/types/utils.py index e187682dd2f6e..f91fe8e4c287f 100644 --- a/mlflow/types/utils.py +++ b/mlflow/types/utils.py @@ -1,8 +1,10 @@ from typing import Any +import warnings import numpy as np import pandas as pd + from mlflow.exceptions import MlflowException from mlflow.types import DataType from mlflow.types.schema import Schema, ColSpec @@ -54,11 +56,11 @@ def _infer_schema(data: Any) -> Schema: "Data in the dictionary must be 1-dimensional, " "got shape {}".format(ary.shape) ) - return Schema(res) + schema = Schema(res) elif isinstance(data, pd.Series): - return Schema([ColSpec(type=_infer_numpy_array(data.values))]) + schema = Schema([ColSpec(type=_infer_numpy_array(data.values))]) elif isinstance(data, pd.DataFrame): - return Schema( + schema = Schema( [ColSpec(type=_infer_numpy_array(data[col].values), name=col) for col in data.columns] ) elif isinstance(data, np.ndarray): @@ -68,25 +70,40 @@ def _infer_schema(data: Any) -> Schema: ) if data.dtype == np.object: data = pd.DataFrame(data).infer_objects() - return Schema( + schema = Schema( [ColSpec(type=_infer_numpy_array(data[col].values)) for col in data.columns] ) - if len(data.shape) == 1: - return Schema([ColSpec(type=_infer_numpy_dtype(data.dtype))]) + elif len(data.shape) == 1: + schema = Schema([ColSpec(type=_infer_numpy_dtype(data.dtype))]) elif len(data.shape) == 2: - return Schema([ColSpec(type=_infer_numpy_dtype(data.dtype))] * data.shape[1]) + schema = Schema([ColSpec(type=_infer_numpy_dtype(data.dtype))] * data.shape[1]) elif _is_spark_df(data): - return Schema( + schema = Schema( [ ColSpec(type=_infer_spark_type(field.dataType), name=field.name) for field in data.schema.fields ] ) - raise TypeError( - "Expected one of (pandas.DataFrame, numpy array, " - "dictionary of (name -> numpy.ndarray), pyspark.sql.DataFrame) " - "but got '{}'".format(type(data)) - ) + else: + raise TypeError( + "Expected one of (pandas.DataFrame, numpy array, " + "dictionary of (name -> numpy.ndarray), pyspark.sql.DataFrame) " + "but got '{}'".format(type(data)) + ) + if any([t in (DataType.integer, DataType.long) for t in schema.column_types()]): + warnings.warn( + "Hint: Inferred schema contains integer column(s). Integer columns in " + "Python can not represent missing values. If your input data contains" + "missing values at inference time, it will be encoded as floats and will " + "cause a schema enforcement error. The best way to avoid this problem is " + "to infer the model schema based on a realistic data sample (training " + "dataset) that includes missing values. Alternatively, you can declare " + "integer columns as doubles (float64) whenever these columns may have " + "missing values. See `Handling Integers With Missing Values " + "`_ for more details." + ) + return schema def _infer_numpy_dtype(dtype: np.dtype) -> DataType: diff --git a/mlflow/utils/autologging_utils.py b/mlflow/utils/autologging_utils.py index bc2c4d3b35c5a..2328d369401cb 100644 --- a/mlflow/utils/autologging_utils.py +++ b/mlflow/utils/autologging_utils.py @@ -1,4 +1,5 @@ import inspect +import itertools import functools import warnings import logging @@ -18,6 +19,7 @@ ENSURE_AUTOLOGGING_ENABLED_TEXT = ( "please ensure that autologging is enabled before constructing the dataset." ) +_AUTOLOGGING_TEST_MODE_ENV_VAR = "MLFLOW_AUTOLOGGING_TESTING" # Dict mapping integration name to its config. AUTOLOGGING_INTEGRATIONS = {} @@ -32,7 +34,10 @@ def try_mlflow_log(fn, *args, **kwargs): try: return fn(*args, **kwargs) except Exception as e: # pylint: disable=broad-except - warnings.warn("Logging to MLflow failed: " + str(e), stacklevel=2) + if _is_testing(): + raise + else: + warnings.warn("Logging to MLflow failed: " + str(e), stacklevel=2) def log_fn_args_as_params(fn, args, kwargs, unlogged=[]): # pylint: disable=W0102 @@ -372,7 +377,7 @@ def _is_testing(): """ import os - return os.environ.get("MLFLOW_AUTOLOGGING_TESTING", "false") == "true" + return os.environ.get(_AUTOLOGGING_TEST_MODE_ENV_VAR, "false") == "true" # Function attribute used for testing purposes to verify that a given function @@ -698,6 +703,19 @@ def _validate_new_input(inp): ) def _validate(autologging_call_input, user_call_input=None): + """ + Validates that the specified `autologging_call_input` and `user_call_input` + are compatible. If `user_call_input` is `None`, then `autologging_call_input` + is regarded as a new input added by autologging and is validated using + `_validate_new_input`. Otherwise, the following properties must hold: + + - `autologging_call_input` and `user_call_input` must have the same type + (referred to as "input type") + - if the input type is a tuple, list or dictionary, then `autologging_call_input` must + be equivalent to `user_call_input` or be a superset of `user_call_input` + - for all other input types, `autologging_call_input` and `user_call_input` + must be equivalent by reference equality or by object equality + """ if user_call_input is None and autologging_call_input is not None: _validate_new_input(autologging_call_input) return @@ -708,14 +726,16 @@ def _validate(autologging_call_input, user_call_input=None): type(autologging_call_input), type(user_call_input) ) - if type(autologging_call_input) == list: + if type(autologging_call_input) in [list, tuple]: length_difference = len(autologging_call_input) - len(user_call_input) assert length_difference >= 0, ( "{} expected inputs are missing from the call" " to the original function.".format(length_difference) ) - user_call_input = user_call_input + ([None] * (length_difference)) - for a, u in zip(autologging_call_input, user_call_input): + # If the autologging call input is longer than the user call input, we `zip_longest` + # will pad the user call input with `None` values to ensure that the subsequent calls + # to `_validate` identify new inputs added by the autologging call + for a, u in itertools.zip_longest(autologging_call_input, user_call_input): _validate(a, u) elif type(autologging_call_input) == dict: assert set(user_call_input.keys()).issubset(set(autologging_call_input.keys())), ( diff --git a/mlflow/utils/validation.py b/mlflow/utils/validation.py index 6fdb6daed55f0..c7d6659bd5766 100644 --- a/mlflow/utils/validation.py +++ b/mlflow/utils/validation.py @@ -63,13 +63,24 @@ def _validate_metric_name(name): ) +def _is_numeric(value): + """ + Returns True if the passed-in value is numeric. + """ + # Note that `isinstance(bool_value, numbers.Number)` returns `True` because `bool` is a + # subclass of `int`. + return not isinstance(value, bool) and isinstance(value, numbers.Number) + + def _validate_metric(key, value, timestamp, step): """ Check that a param with the specified key, value, timestamp is valid and raise an exception if it isn't. """ _validate_metric_name(key) - if not isinstance(value, numbers.Number): + # value must be a Number + # since bool is an instance of Number check for bool additionally + if not _is_numeric(value): raise MlflowException( "Got invalid value %s for metric '%s' (timestamp=%s). Please specify value as a valid " "double (64-bit floating point)" % (value, key, timestamp), diff --git a/mlflow/xgboost.py b/mlflow/xgboost.py index c688e37c02bb8..22552992f9b87 100644 --- a/mlflow/xgboost.py +++ b/mlflow/xgboost.py @@ -32,15 +32,16 @@ from mlflow.models.signature import ModelSignature from mlflow.models.utils import _save_example from mlflow.tracking.artifact_utils import _download_artifact_from_uri -from mlflow.utils import gorilla from mlflow.utils.environment import _mlflow_conda_env from mlflow.utils.model_utils import _get_flavor_configuration from mlflow.exceptions import MlflowException from mlflow.utils.annotations import experimental from mlflow.utils.autologging_utils import ( + autologging_integration, + safe_patch, + exception_safe_function, try_mlflow_log, log_fn_args_as_params, - wrap_patch, INPUT_EXAMPLE_SAMPLE_ROWS, resolve_input_example_and_signature, _InputExampleInfo, @@ -283,11 +284,16 @@ def predict(self, dataframe): @experimental +@autologging_integration(FLAVOR_NAME) def autolog( - importance_types=None, log_input_examples=False, log_model_signatures=True, log_models=True, -): + importance_types=None, + log_input_examples=False, + log_model_signatures=True, + log_models=True, + disable=False, +): # pylint: disable=W0102,unused-argument """ - Enables automatic logging from XGBoost to MLflow. Logs the following. + Enables (or disables) and configures autologging from XGBoost to MLflow. Logs the following: - parameters specified in `xgboost.train`_. - metrics on each iteration (if ``evals`` specified). @@ -316,6 +322,8 @@ def autolog( If ``False``, trained models are not logged. Input examples and model signatures, which are attributes of MLflow models, are also omitted when ``log_models`` is ``False``. + :param disable: If ``True``, disables all supported autologging integrations. If ``False``, + enables all supported autologging integrations. """ import xgboost import numpy as np @@ -327,9 +335,8 @@ def autolog( # to use as an input example and for inferring the model signature. # (there is no way to get the data back from a DMatrix object) # We store it on the DMatrix object so the train function is able to read it. - def __init__(self, *args, **kwargs): + def __init__(original, self, *args, **kwargs): data = args[0] if len(args) > 0 else kwargs.get("data") - original = gorilla.get_original_attribute(xgboost.DMatrix, "__init__") if data is not None: try: @@ -348,24 +355,19 @@ def __init__(self, *args, **kwargs): original(self, *args, **kwargs) - def train(*args, **kwargs): + def train(original, *args, **kwargs): def record_eval_results(eval_results, metrics_logger): """ Create a callback function that records evaluation results. """ + @exception_safe_function def callback(env): metrics_logger.record_metrics(dict(env.evaluation_result_list), env.iteration) eval_results.append(dict(env.evaluation_result_list)) return callback - if not mlflow.active_run(): - try_mlflow_log(mlflow.start_run) - auto_end_run = True - else: - auto_end_run = False - def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. @@ -403,8 +405,6 @@ def log_feature_importance_plot(features, importance, importance_type): plt.close(fig) shutil.rmtree(tmpdir) - original = gorilla.get_original_attribute(xgboost, "train") - # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs["params"] @@ -518,9 +518,7 @@ def infer_model_signature(input_example): input_example=input_example, ) - if auto_end_run: - try_mlflow_log(mlflow.end_run) return model - wrap_patch(xgboost, "train", train) - wrap_patch(xgboost.DMatrix, "__init__", __init__) + safe_patch(FLAVOR_NAME, xgboost, "train", train, manage_run=True) + safe_patch(FLAVOR_NAME, xgboost.DMatrix, "__init__", __init__) diff --git a/tests/autologging/__init__.py b/tests/autologging/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/autologging/fixtures.py b/tests/autologging/fixtures.py new file mode 100644 index 0000000000000..e42c7e98f2de7 --- /dev/null +++ b/tests/autologging/fixtures.py @@ -0,0 +1,20 @@ +import pytest +from unittest import mock + +import mlflow.utils.autologging_utils as autologging_utils + + +@pytest.fixture +def test_mode_off(): + with mock.patch("mlflow.utils.autologging_utils._is_testing") as testing_mock: + testing_mock.return_value = False + assert not autologging_utils._is_testing() + yield + + +@pytest.fixture +def test_mode_on(): + with mock.patch("mlflow.utils.autologging_utils._is_testing") as testing_mock: + testing_mock.return_value = True + assert autologging_utils._is_testing() + yield diff --git a/tests/autologging/test_autologging_safety_integration.py b/tests/autologging/test_autologging_safety_integration.py new file mode 100644 index 0000000000000..2a763e615b523 --- /dev/null +++ b/tests/autologging/test_autologging_safety_integration.py @@ -0,0 +1,65 @@ +# pylint: disable=unused-argument + +import importlib +import pytest +from unittest import mock + +import mlflow +from mlflow.utils import gorilla +from mlflow.utils.autologging_utils import ( + safe_patch, + get_autologging_config, + autologging_is_disabled, +) + + +pytestmark = pytest.mark.large + + +AUTOLOGGING_INTEGRATIONS_TO_TEST = { + mlflow.sklearn: "sklearn", + mlflow.keras: "keras", + mlflow.xgboost: "xgboost", +} + + +@pytest.fixture(autouse=True, scope="module") +def import_integration_libraries(): + for library_module in AUTOLOGGING_INTEGRATIONS_TO_TEST.values(): + importlib.import_module(library_module) + + +@pytest.fixture(autouse=True) +def disable_autologging_at_test_end(): + for integration in AUTOLOGGING_INTEGRATIONS_TO_TEST: + integration.autolog(disable=True) + + +def test_autologging_integrations_expose_configs_and_support_disablement(): + for integration in AUTOLOGGING_INTEGRATIONS_TO_TEST: + integration.autolog(disable=False) + + assert not autologging_is_disabled(integration.FLAVOR_NAME) + assert not get_autologging_config(integration.FLAVOR_NAME, "disable", True) + + integration.autolog(disable=True) + + assert autologging_is_disabled(integration.FLAVOR_NAME) + assert get_autologging_config(integration.FLAVOR_NAME, "disable", False) + + +def test_autologging_integrations_use_safe_patch_for_monkey_patching(): + for integration in AUTOLOGGING_INTEGRATIONS_TO_TEST: + with mock.patch( + "mlflow.utils.gorilla.apply", wraps=gorilla.apply + ) as gorilla_mock, mock.patch( + integration.__name__ + ".safe_patch", wraps=safe_patch + ) as safe_patch_mock: + integration.autolog(disable=False) + assert safe_patch_mock.call_count > 0 + # `safe_patch` leverages `gorilla.apply` in its implementation. Accordingly, we expect + # that the total number of `gorilla.apply` calls to be equivalent to the number of + # `safe_patch` calls. This verifies that autologging integrations are leveraging + # `safe_patch`, rather than calling `gorilla.apply` directly (which does not provide + # exception safety properties) + assert safe_patch_mock.call_count == gorilla_mock.call_count diff --git a/tests/autologging/test_autologging_safety_unit.py b/tests/autologging/test_autologging_safety_unit.py index 0beaa05ef8f5f..e51ac577836fb 100644 --- a/tests/autologging/test_autologging_safety_unit.py +++ b/tests/autologging/test_autologging_safety_unit.py @@ -19,21 +19,29 @@ with_managed_run, _validate_args, _is_testing, + try_mlflow_log, ) +from tests.autologging.fixtures import test_mode_off, test_mode_on # pylint: disable=unused-import + pytestmark = pytest.mark.large -@pytest.fixture -def test_mode_on(): - with mock.patch("mlflow.utils.autologging_utils._is_testing") as testing_mock: - testing_mock.return_value = True - assert autologging_utils._is_testing() - yield +PATCH_DESTINATION_FN_DEFAULT_RESULT = "original_result" -PATCH_DESTINATION_FN_DEFAULT_RESULT = "original_result" +@pytest.fixture(autouse=True) +def turn_test_mode_off_by_default(test_mode_off): + """ + Most of the unit test cases in this module assume that autologging APIs are operating in a + standard execution mode (i.e. where test mode is disabled). Accordingly, we turn off autologging + test mode for this test module by default. Test cases that verify behaviors specific to test + mode enable test mode explicitly by specifying the `test_mode_on` fixture. + + For more information about autologging test mode, see the docstring for + :py:func:`mlflow.utils.autologging_utils._is_testing()`. + """ @pytest.fixture @@ -215,8 +223,9 @@ def patch_impl(original, *args, **kwargs): assert formatting_arg2 == exc_to_throw +@pytest.mark.usefixtures(test_mode_on.__name__) def test_safe_patch_propagates_exceptions_raised_outside_of_original_function_in_test_mode( - patch_destination, test_autologging_integration, test_mode_on + patch_destination, test_autologging_integration ): exc_to_throw = Exception("Bad patch implementation") @@ -305,11 +314,12 @@ def patch_impl(original, *args, **kwargs): assert patch_impl_called +@pytest.mark.usefixtures(test_mode_on.__name__) def test_safe_patch_validates_arguments_to_original_function_in_test_mode( - patch_destination, test_autologging_integration, test_mode_on + patch_destination, test_autologging_integration ): def patch_impl(original, *args, **kwargs): - return original(1, 2, 3) + return original("1", "2", "3") safe_patch(test_autologging_integration, patch_destination, "fn", patch_impl) @@ -428,7 +438,8 @@ def throwing_function(): assert formatting_arg == exc_to_throw -def test_exception_safe_function_exhibits_expected_behavior_in_test_mode(test_mode_on,): +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_exception_safe_function_exhibits_expected_behavior_in_test_mode(): assert autologging_utils._is_testing() @exception_safe_function @@ -474,7 +485,8 @@ def function(self): assert formatting_arg == exc_to_throw -def test_exception_safe_class_exhibits_expected_behavior_in_test_mode(test_mode_on,): +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_exception_safe_class_exhibits_expected_behavior_in_test_mode(): assert autologging_utils._is_testing() class NonThrowingClass(metaclass=ExceptionSafeClass): @@ -640,8 +652,9 @@ def _on_exception(self, exception): assert RunStatus.from_string(status2) == RunStatus.FINISHED -def test_validate_args_succeeds_when_arg_sets_are_equivalent_or_identical(test_mode_on,): - args = [1, "b", ["c"]] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_succeeds_when_arg_sets_are_equivalent_or_identical(): + args = (1, "b", ["c"]) kwargs = { "foo": ["bar"], "biz": {"baz": 5}, @@ -659,8 +672,9 @@ def test_validate_args_succeeds_when_arg_sets_are_equivalent_or_identical(test_m _validate_args(None, kwargs, None, kwargs_copy) -def test_validate_args_throws_when_extra_args_are_not_functions_classes_or_lists(test_mode_on,): - user_call_args = [1, "b", ["c"]] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_throws_when_extra_args_are_not_functions_classes_or_lists(): + user_call_args = (1, "b", ["c"]) user_call_kwargs = { "foo": ["bar"], "biz": {"baz": 5}, @@ -682,8 +696,9 @@ def test_validate_args_throws_when_extra_args_are_not_functions_classes_or_lists ) -def test_validate_args_throws_when_extra_args_are_not_exception_safe(test_mode_on,): - user_call_args = [1, "b", ["c"]] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_throws_when_extra_args_are_not_exception_safe(): + user_call_args = (1, "b", ["c"]) user_call_kwargs = { "foo": ["bar"], "biz": {"baz": 5}, @@ -693,7 +708,7 @@ class Unsafe: pass unsafe_autologging_call_args = copy.deepcopy(user_call_args) - unsafe_autologging_call_args.append(lambda: "foo") + unsafe_autologging_call_args += (lambda: "foo",) unsafe_autologging_call_kwargs1 = copy.deepcopy(user_call_kwargs) unsafe_autologging_call_kwargs1["foo"].append(Unsafe()) @@ -716,10 +731,9 @@ class Unsafe: ) -def test_validate_args_succeeds_when_extra_args_are_exception_safe_functions_or_classes( - test_mode_on, -): - user_call_args = [1, "b", ["c"]] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_succeeds_when_extra_args_are_exception_safe_functions_or_classes(): + user_call_args = (1, "b", ["c"]) user_call_kwargs = { "foo": ["bar"], } @@ -729,7 +743,7 @@ class Safe(metaclass=ExceptionSafeClass): autologging_call_args = copy.deepcopy(user_call_args) autologging_call_args[2].append(Safe()) - autologging_call_args.append(exception_safe_function(lambda: "foo")) + autologging_call_args += (exception_safe_function(lambda: "foo"),) autologging_call_kwargs = copy.deepcopy(user_call_kwargs) autologging_call_kwargs["foo"].append(exception_safe_function(lambda: "foo")) @@ -738,8 +752,9 @@ class Safe(metaclass=ExceptionSafeClass): _validate_args(user_call_args, user_call_kwargs, autologging_call_args, autologging_call_kwargs) -def test_validate_args_throws_when_args_are_omitted(test_mode_on,): - user_call_args = [1, "b", ["c"], {"d": "e"}] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_throws_when_args_are_omitted(): + user_call_args = (1, "b", ["c"], {"d": "e"}) user_call_kwargs = { "foo": ["bar"], "biz": {"baz": 4, "fuzz": 5}, @@ -790,14 +805,15 @@ def test_validate_args_throws_when_args_are_omitted(test_mode_on,): ) -def test_validate_args_throws_when_arg_types_or_values_are_changed(test_mode_on,): - user_call_args = [1, "b", ["c"]] +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_validate_args_throws_when_arg_types_or_values_are_changed(): + user_call_args = (1, "b", ["c"]) user_call_kwargs = { "foo": ["bar"], } invalid_autologging_call_args_1 = copy.deepcopy(user_call_args) - invalid_autologging_call_args_1[0] = 2 + invalid_autologging_call_args_1 = (2,) + invalid_autologging_call_args_1[1:] invalid_autologging_call_kwargs_1 = copy.deepcopy(user_call_kwargs) invalid_autologging_call_kwargs_1["foo"] = ["biz"] @@ -811,8 +827,8 @@ def test_validate_args_throws_when_arg_types_or_values_are_changed(test_mode_on, user_call_args, user_call_kwargs, user_call_args, invalid_autologging_call_kwargs_1 ) - invalid_autologging_call_args_2 = copy.deepcopy(user_call_args) - invalid_autologging_call_args_2[1] = {"7": 1} + call_arg_1, call_arg_2, _ = copy.deepcopy(user_call_args) + invalid_autologging_call_args_2 = ({"7": 1}, call_arg_1, call_arg_2) invalid_autologging_call_kwargs_2 = copy.deepcopy(user_call_kwargs) invalid_autologging_call_kwargs_2["foo"] = 8 @@ -825,3 +841,24 @@ def test_validate_args_throws_when_arg_types_or_values_are_changed(test_mode_on, _validate_args( user_call_args, user_call_kwargs, user_call_args, invalid_autologging_call_kwargs_2 ) + + +def test_try_mlflow_log_emits_exceptions_as_warnings_in_standard_mode(): + assert not autologging_utils._is_testing() + + def throwing_function(): + raise Exception("bad implementation") + + with pytest.warns(UserWarning, match="bad implementation"): + try_mlflow_log(throwing_function) + + +@pytest.mark.usefixtures(test_mode_on.__name__) +def test_try_mlflow_log_propagates_exceptions_in_test_mode(): + assert autologging_utils._is_testing() + + def throwing_function(): + raise Exception("bad implementation") + + with pytest.raises(Exception, match="bad implementation"): + try_mlflow_log(throwing_function) diff --git a/tests/autologging/test_autologging_utils.py b/tests/autologging/test_autologging_utils.py index ce78d4b550559..e80cf08228e8a 100644 --- a/tests/autologging/test_autologging_utils.py +++ b/tests/autologging/test_autologging_utils.py @@ -23,6 +23,9 @@ from mlflow.utils.autologging_utils import AUTOLOGGING_INTEGRATIONS +from tests.autologging.fixtures import test_mode_off + + pytestmark = pytest.mark.large @@ -142,17 +145,26 @@ def new_add(self, *args, **kwargs): assert Math().add(1, 2) == 6 +def sample_function_to_patch(a, b): + return a + b + + def test_wrap_patch_with_module(): - def new_log_param(key, value): + import sys + + this_module = sys.modules[__name__] + + def new_sample_function(a, b): """new mlflow.log_param""" - return (key, value) + return a - b - before = get_func_attrs(mlflow.log_param) - wrap_patch(mlflow, mlflow.log_param.__name__, new_log_param) - after = get_func_attrs(mlflow.log_param) + before_attrs = get_func_attrs(mlflow.log_param) + assert sample_function_to_patch(10, 5) == 15 - assert after == before - assert mlflow.log_param("foo", "bar") == ("foo", "bar") + wrap_patch(this_module, sample_function_to_patch.__name__, new_sample_function) + after_attrs = get_func_attrs(mlflow.log_param) + assert after_attrs == before_attrs + assert sample_function_to_patch(10, 5) == 5 @pytest.fixture() @@ -371,6 +383,7 @@ def test_batch_metrics_logger_logs_timestamps_as_int_milliseconds(start_run,): assert logged_metric.timestamp == 123456 +@pytest.mark.usefixtures(test_mode_off.__name__) def test_batch_metrics_logger_continues_if_log_batch_fails(start_run,): with mock.patch.object(MlflowClient, "log_batch") as log_batch_mock: log_batch_mock.side_effect = [Exception("asdf"), None] @@ -458,13 +471,13 @@ def autolog(foo="bar", t=7, disable=False): # Before `autolog()` has been invoked, config values should not be available assert get_autologging_config("test_integration_for_config", "foo") is None assert get_autologging_config("test_integration_for_config", "disable") is None - assert get_autologging_config("test_integration_for_config", "t", 10) is 10 + assert get_autologging_config("test_integration_for_config", "t", 10) == 10 autolog() assert get_autologging_config("test_integration_for_config", "foo") == "bar" assert get_autologging_config("test_integration_for_config", "disable") is False - assert get_autologging_config("test_integration_for_config", "t", 10) is 7 + assert get_autologging_config("test_integration_for_config", "t", 10) == 7 assert get_autologging_config("test_integration_for_config", "nonexistent") is None autolog(foo="baz") diff --git a/tests/conftest.py b/tests/conftest.py index fa4407762958d..5bdfd04b32de8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,10 @@ import pytest import mlflow +from mlflow.utils.autologging_utils import ( + _is_testing, + _AUTOLOGGING_TEST_MODE_ENV_VAR, +) from mlflow.utils.file_utils import path_to_local_sqlite_uri @@ -33,3 +37,22 @@ def tracking_uri_mock(tmpdir, request): mlflow.set_tracking_uri(None) if "notrackingurimock" not in request.keywords: del os.environ["MLFLOW_TRACKING_URI"] + + +@pytest.fixture(autouse=True, scope="session") +def enable_test_mode_by_default_for_autologging_integrations(): + """ + Run all MLflow tests in autologging test mode, ensuring that errors in autologging patch code + are raised and detected. For more information about autologging test mode, see the docstring + for :py:func:`mlflow.utils.autologging_utils._is_testing()`. + """ + try: + prev_env_var_value = os.environ.pop(_AUTOLOGGING_TEST_MODE_ENV_VAR, None) + os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = "true" + assert _is_testing() + yield + finally: + if prev_env_var_value: + os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = prev_env_var_value + else: + del os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] diff --git a/tests/fastai/test_fastai_autolog.py b/tests/fastai/test_fastai_autolog.py index 34b0d2c4bdcec..7e2dd182d0237 100644 --- a/tests/fastai/test_fastai_autolog.py +++ b/tests/fastai/test_fastai_autolog.py @@ -177,20 +177,18 @@ def test_fastai_autolog_model_can_load_from_artifact(fastai_random_data_run): def fastai_random_data_run_with_callback(iris_data, fit_variant, manual_run, callback, patience): # pylint: disable=unused-argument mlflow.fastai.autolog() - callbacks = [] - if callback == "early": - # min_delta is set as such to guarantee early stopping - callbacks.append( - lambda learn: EarlyStoppingCallback(learn, patience=patience, min_delta=MIN_DELTA) - ) + model = fastai_model(iris_data) - model = fastai_model(iris_data, callback_fns=callbacks) + callbacks = [] + if callback == "early": + callback = EarlyStoppingCallback(learn=model, patience=patience, min_delta=MIN_DELTA) + callbacks.append(callback) if fit_variant == "fit_one_cycle": - model.fit_one_cycle(NUM_EPOCHS) + model.fit_one_cycle(NUM_EPOCHS, callbacks=callbacks) else: - model.fit(NUM_EPOCHS) + model.fit(NUM_EPOCHS, callbacks=callbacks) client = mlflow.tracking.MlflowClient() return model, client.get_run(client.list_run_infos(experiment_id="0")[0].run_id) diff --git a/tests/gluon/test_gluon_model_export.py b/tests/gluon/test_gluon_model_export.py index 214662c320a74..b56a381044e44 100644 --- a/tests/gluon/test_gluon_model_export.py +++ b/tests/gluon/test_gluon_model_export.py @@ -14,7 +14,6 @@ from mxnet.gluon.data import DataLoader from mxnet.gluon.loss import SoftmaxCrossEntropyLoss from mxnet.gluon.nn import HybridSequential, Dense -from mxnet.metric import Accuracy import mlflow import mlflow.gluon @@ -29,6 +28,11 @@ from tests.helper_functions import pyfunc_serve_and_score_model +if LooseVersion(mx.__version__) >= LooseVersion("2.0.0"): + from mxnet.gluon.metric import Accuracy # pylint: disable=import-error +else: + from mxnet.metric import Accuracy # pylint: disable=import-error + @pytest.fixture def model_path(tmpdir): diff --git a/tests/gluon_autolog/test_gluon_autolog.py b/tests/gluon_autolog/test_gluon_autolog.py index 70730c28e6cf9..ee6b463cad3cf 100644 --- a/tests/gluon_autolog/test_gluon_autolog.py +++ b/tests/gluon_autolog/test_gluon_autolog.py @@ -11,13 +11,17 @@ from mxnet.gluon.data import Dataset, DataLoader from mxnet.gluon.loss import SoftmaxCrossEntropyLoss from mxnet.gluon.nn import HybridSequential, Dense -from mxnet.metric import Accuracy import mlflow import mlflow.gluon from mlflow.utils.autologging_utils import BatchMetricsLogger from unittest.mock import patch +if LooseVersion(mx.__version__) >= LooseVersion("2.0.0"): + from mxnet.gluon.metric import Accuracy # pylint: disable=import-error +else: + from mxnet.metric import Accuracy # pylint: disable=import-error + class LogsDataset(Dataset): def __init__(self): diff --git a/tests/keras/test_keras_model_export.py b/tests/keras/test_keras_model_export.py index 230a9c49b7b1d..a1c37f7c279ff 100644 --- a/tests/keras/test_keras_model_export.py +++ b/tests/keras/test_keras_model_export.py @@ -183,7 +183,12 @@ class FakeKerasModule(object): @staticmethod def load_model(file, **kwargs): # pylint: disable=unused-argument - return MyModel(file.get("x").value) + + # `Dataset.value` was removed in `h5py == 3.0.0` + if LooseVersion(h5py.__version__) >= LooseVersion("3.0.0"): + return MyModel(file.get("x")[()].decode("utf-8")) + else: + return MyModel(file.get("x").value) original_import = importlib.import_module diff --git a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py index d61f49283a185..8d4e3576a6797 100644 --- a/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py +++ b/tests/pyfunc/test_model_export_with_loader_module_and_data_path.py @@ -182,39 +182,75 @@ def predict(pdf): assert res.dtypes.to_dict() == expected_types pdf["b"] = pdf["b"].astype(np.int64) - # 3. double -> float raises + # 3. unsigned int -> long works + pdf["b"] = pdf["b"].astype(np.uint32) + res = pyfunc_model.predict(pdf) + assert all((res == pdf[input_schema.column_names()]).all()) + assert res.dtypes.to_dict() == expected_types + pdf["b"] = pdf["b"].astype(np.int64) + + # 4. unsigned int -> int raises + pdf["a"] = pdf["a"].astype(np.uint32) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex) + pdf["a"] = pdf["a"].astype(np.int32) + + # 5. double -> float raises pdf["c"] = pdf["c"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) - # 4. float -> double works + # 6. float -> double works, double -> float does not pdf["d"] = pdf["d"].astype(np.float32) res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types assert "Incompatible input types" in str(ex) - pdf["d"] = pdf["d"].astype(np.int64) + pdf["d"] = pdf["d"].astype(np.float64) + pdf["c"] = pdf["c"].astype(np.float64) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex) + pdf["c"] = pdf["c"].astype(np.float32) - # 5. floats -> ints raises + # 7. int -> float raises pdf["c"] = pdf["c"].astype(np.int32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) + # 8. int -> double works + pdf["d"] = pdf["d"].astype(np.int32) + pyfunc_model.predict(pdf) + assert all((res == pdf[input_schema.column_names()]).all()) + assert res.dtypes.to_dict() == expected_types + + # 9. long -> double raises pdf["d"] = pdf["d"].astype(np.int64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["d"] = pdf["d"].astype(np.float64) - # 6. ints -> floats raises + # 10. any float -> any int raises pdf["a"] = pdf["a"].astype(np.float32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) + # 10. any float -> any int raises + pdf["a"] = pdf["a"].astype(np.float64) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) + pdf["b"] = pdf["b"].astype(np.float64) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex) + pdf["b"] = pdf["b"].astype(np.int64) pdf["b"] = pdf["b"].astype(np.float64) with pytest.raises(MlflowException) as ex: @@ -222,7 +258,7 @@ def predict(pdf): pdf["b"] = pdf["b"].astype(np.int64) assert "Incompatible input types" in str(ex) - # 7. objects work + # 11. objects work pdf["b"] = pdf["b"].astype(np.object) pdf["d"] = pdf["d"].astype(np.object) pdf["e"] = pdf["e"].astype(np.object) @@ -232,6 +268,34 @@ def predict(pdf): assert res.dtypes.to_dict() == expected_types +def test_missing_value_hint_is_displayed_when_it_should(): + class TestModel(object): + @staticmethod + def predict(pdf): + return pdf + + m = Model() + input_schema = Schema([ColSpec("integer", "a")]) + m.signature = ModelSignature(inputs=input_schema) + pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) + pdf = pd.DataFrame(data=[[1], [None]], columns=["a"],) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + hint = "Hint: the type mismatch is likely caused by missing values." + assert "Incompatible input types" in str(ex.value.message) + assert hint in str(ex.value.message) + pdf = pd.DataFrame(data=[[1.5], [None]], columns=["a"],) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex) + assert hint not in str(ex.value.message) + pdf = pd.DataFrame(data=[[1], [2]], columns=["a"], dtype=np.float64) + with pytest.raises(MlflowException) as ex: + pyfunc_model.predict(pdf) + assert "Incompatible input types" in str(ex.value.message) + assert hint not in str(ex.value.message) + + def test_schema_enforcement_no_col_names(): class TestModel(object): @staticmethod diff --git a/tests/pytorch/test_pytorch_autolog.py b/tests/pytorch/test_pytorch_autolog.py index 46c8975e7602d..98cc84d971c37 100644 --- a/tests/pytorch/test_pytorch_autolog.py +++ b/tests/pytorch/test_pytorch_autolog.py @@ -1,3 +1,5 @@ +from distutils.version import LooseVersion + import pytest import pytorch_lightning as pl import torch @@ -59,7 +61,10 @@ def test_pytorch_autolog_logs_expected_data(pytorch_model): # Testing optimizer parameters are logged assert "optimizer_name" in data.params - assert data.params["optimizer_name"] == "Adam" + + # In pytorch-lightning >= 1.1.0, optimizer names are prefixed with "Lightning". + prefix = "Lightning" if LooseVersion(pl.__version__) >= LooseVersion("1.1.0") else "" + assert data.params["optimizer_name"] == prefix + "Adam" # Testing model_summary.txt is saved client = mlflow.tracking.MlflowClient() diff --git a/tests/sklearn/test_sklearn_autolog.py b/tests/sklearn/test_sklearn_autolog.py index d5d9d4a577ffa..cd54f06494bcb 100644 --- a/tests/sklearn/test_sklearn_autolog.py +++ b/tests/sklearn/test_sklearn_autolog.py @@ -2,7 +2,6 @@ import inspect from unittest import mock import os -import warnings import matplotlib.pyplot as plt import numpy as np import pandas as pd @@ -26,7 +25,6 @@ _truncate_dict, ) from mlflow.utils.mlflow_tags import MLFLOW_PARENT_RUN_ID -from mlflow.utils.autologging_utils import try_mlflow_log from mlflow.utils.validation import ( MAX_PARAMS_TAGS_PER_BATCH, MAX_METRICS_PER_BATCH, @@ -34,6 +32,8 @@ MAX_ENTITY_KEY_LENGTH, ) +from tests.autologging.fixtures import test_mode_off + FIT_FUNC_NAMES = ["fit", "fit_transform", "fit_predict"] TRAINING_SCORE = "training_score" ESTIMATOR_CLASS = "estimator_class" @@ -107,33 +107,6 @@ def fit_func_name(request): return request.param -@pytest.fixture(autouse=True, scope="function") -def force_try_mlflow_log_to_fail(request): - # autolog contains multiple `try_mlflow_log`. They unexpectedly allow tests that - # should fail to pass (without us noticing). To prevent that, temporarily turns - # warnings emitted by `try_mlflow_log` into errors. - if "disable_force_try_mlflow_log_to_fail" in request.keywords: - yield - else: - with warnings.catch_warnings(): - warnings.filterwarnings( - "error", message=r"^Logging to MLflow failed", category=UserWarning, - ) - yield - - -@pytest.mark.xfail(strict=True, raises=UserWarning) -def test_force_try_mlflow_log_to_fail(): - with mlflow.start_run(): - try_mlflow_log(lambda: 1 / 0) - - -@pytest.mark.disable_force_try_mlflow_log_to_fail -def test_no_force_try_mlflow_log_to_fail(): - with mlflow.start_run(): - try_mlflow_log(lambda: 1 / 0) - - def test_autolog_preserves_original_function_attributes(): def get_func_attrs(f): attrs = {} @@ -852,7 +825,7 @@ def test_parameter_search_handles_large_volume_of_metric_outputs(): assert len(child_run.data.metrics) >= metrics_size -@pytest.mark.disable_force_try_mlflow_log_to_fail +@pytest.mark.usefixtures(test_mode_off.__name__) @pytest.mark.parametrize( "failing_specialization", [ @@ -871,7 +844,7 @@ def test_autolog_does_not_throw_when_parameter_search_logging_fails(failing_spec mock_func.assert_called_once() -@pytest.mark.disable_force_try_mlflow_log_to_fail +@pytest.mark.usefixtures(test_mode_off.__name__) @pytest.mark.parametrize( "func_to_fail", ["mlflow.log_params", "mlflow.log_metric", "mlflow.set_tags", "mlflow.sklearn.log_model"], diff --git a/tests/spark_autologging/test_spark_datasource_autologging_order.py b/tests/spark_autologging/test_spark_datasource_autologging_order.py index 6f640bfb14004..5b9b23a445823 100644 --- a/tests/spark_autologging/test_spark_datasource_autologging_order.py +++ b/tests/spark_autologging/test_spark_datasource_autologging_order.py @@ -11,12 +11,16 @@ from pyspark.sql.types import StructType, IntegerType, StructField from tests.spark_autologging.utils import _get_or_create_spark_session -from tests.spark_autologging.utils import _assert_spark_data_logged +from tests.spark_autologging.utils import ( + _assert_spark_data_logged, + _assert_spark_data_not_logged, +) @pytest.mark.large -def test_enabling_autologging_before_spark_session_works(): - mlflow.spark.autolog() +@pytest.mark.parametrize("disable", [False, True]) +def test_enabling_autologging_before_spark_session_works(disable): + mlflow.spark.autolog(disable=disable) # creating spark session AFTER autolog was enabled spark_session = _get_or_create_spark_session() @@ -42,7 +46,10 @@ def test_enabling_autologging_before_spark_session_works(): time.sleep(1) run = mlflow.get_run(run_id) - _assert_spark_data_logged(run=run, path=filepath, data_format="csv") + if disable: + _assert_spark_data_not_logged(run=run) + else: + _assert_spark_data_logged(run=run, path=filepath, data_format="csv") shutil.rmtree(tempdir) spark_session.stop() diff --git a/tests/spark_autologging/test_spark_disable_autologging.py b/tests/spark_autologging/test_spark_disable_autologging.py new file mode 100644 index 0000000000000..781d03a394b94 --- /dev/null +++ b/tests/spark_autologging/test_spark_disable_autologging.py @@ -0,0 +1,113 @@ +import time + +import pytest + +import mlflow +import mlflow.spark + +from tests.tracking.test_rest_tracking import mlflow_client # pylint: disable=unused-import +from tests.spark_autologging.utils import ( + _assert_spark_data_logged, + _assert_spark_data_not_logged, +) +from tests.spark_autologging.utils import spark_session # pylint: disable=unused-import +from tests.spark_autologging.utils import format_to_file_path # pylint: disable=unused-import +from tests.spark_autologging.utils import data_format # pylint: disable=unused-import +from tests.spark_autologging.utils import file_path # pylint: disable=unused-import + + +# Note that the following tests run one-after-the-other and operate on the SAME spark_session +# (it is not reset between tests) + + +@pytest.mark.large +def test_autologging_disabled_logging_datasource_with_different_formats( + spark_session, format_to_file_path +): + mlflow.spark.autolog(disable=True) + for data_format, file_path in format_to_file_path.items(): + df = ( + spark_session.read.format(data_format) + .option("header", "true") + .option("inferSchema", "true") + .load(file_path) + ) + + with mlflow.start_run(): + run_id = mlflow.active_run().info.run_id + df.collect() + time.sleep(1) + run = mlflow.get_run(run_id) + _assert_spark_data_not_logged(run=run) + + +@pytest.mark.large +def test_autologging_disabled_logging_with_or_without_active_run( + spark_session, format_to_file_path +): + mlflow.spark.autolog(disable=True) + data_format = list(format_to_file_path.keys())[0] + file_path = format_to_file_path[data_format] + df = ( + spark_session.read.format(data_format) + .option("header", "true") + .option("inferSchema", "true") + .load(file_path) + ) + + # Reading data source before starting a run + df.filter("number1 > 0").collect() + df.limit(2).collect() + df.collect() + + # If there was any tag info collected it will be logged here + with mlflow.start_run(): + run_id = mlflow.active_run().info.run_id + time.sleep(1) + + # Confirm nothing was logged. + run = mlflow.get_run(run_id) + _assert_spark_data_not_logged(run=run) + + # Reading data source during an active run + with mlflow.start_run(): + run_id = mlflow.active_run().info.run_id + df.collect() + time.sleep(1) + run = mlflow.get_run(run_id) + _assert_spark_data_not_logged(run=run) + + +@pytest.mark.large +def test_autologging_disabled_then_enabled(spark_session, format_to_file_path): + mlflow.spark.autolog(disable=True) + data_format = list(format_to_file_path.keys())[0] + file_path = format_to_file_path[data_format] + df = ( + spark_session.read.format(data_format) + .option("header", "true") + .option("inferSchema", "true") + .load(file_path) + ) + # Logging is disabled here. + with mlflow.start_run(): + run_id = mlflow.active_run().info.run_id + df.collect() + time.sleep(1) + run = mlflow.get_run(run_id) + _assert_spark_data_not_logged(run=run) + + # Logging is enabled here. + mlflow.spark.autolog(disable=False) + with mlflow.start_run(): + run_id = mlflow.active_run().info.run_id + df.filter("number1 > 0").collect() + time.sleep(1) + run = mlflow.get_run(run_id) + _assert_spark_data_logged(run=run, path=file_path, data_format=data_format) + + +@pytest.mark.large +def test_enabling_autologging_does_not_throw_when_spark_hasnt_been_started(spark_session): + spark_session.stop() + mlflow.spark.autolog(disable=True) diff --git a/tests/spark_autologging/utils.py b/tests/spark_autologging/utils.py index fa7626916212f..f1e97e622cbfe 100644 --- a/tests/spark_autologging/utils.py +++ b/tests/spark_autologging/utils.py @@ -39,6 +39,10 @@ def _assert_spark_data_logged(run, path, data_format, version=None): assert table_info_tag == _get_expected_table_info_row(path, data_format, version) +def _assert_spark_data_not_logged(run): + assert _SPARK_TABLE_INFO_TAG_NAME not in run.data.tags + + def _get_or_create_spark_session(jars=None): jar_path = jars if jars is not None else _get_mlflow_spark_jar_path() return SparkSession.builder.config("spark.jars", jar_path).master("local[*]").getOrCreate() diff --git a/tests/test_flavors.py b/tests/test_flavors.py index 363f771836710..dfb274d7d7554 100644 --- a/tests/test_flavors.py +++ b/tests/test_flavors.py @@ -23,7 +23,8 @@ def is_model_flavor(src): def iter_flavor_names(): for root, _, files in os.walk("mlflow"): for f in files: - if not f.endswith(".py"): + is_private_module = f.startswith("_") and f != "__init__.py" + if not f.endswith(".py") or is_private_module: continue path = os.path.join(root, f) src = read_file(path) diff --git a/tests/utils/test_validation.py b/tests/utils/test_validation.py index 5f71d11ab012e..f35d6734f3245 100644 --- a/tests/utils/test_validation.py +++ b/tests/utils/test_validation.py @@ -5,6 +5,7 @@ from mlflow.entities import Metric, Param, RunTag from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE from mlflow.utils.validation import ( + _is_numeric, _validate_metric_name, _validate_param_name, _validate_tag_name, @@ -43,6 +44,15 @@ ] +def test_is_numeric(): + assert _is_numeric(0) + assert _is_numeric(0.0) + assert not _is_numeric(True) + assert not _is_numeric(False) + assert not _is_numeric("0") + assert not _is_numeric(None) + + def test_validate_metric_name(): for good_name in GOOD_METRIC_OR_PARAM_NAMES: _validate_metric_name(good_name) @@ -122,6 +132,7 @@ def test_validate_batch_log_data(): Metric("super-long-bad-key" * 1000, 4.0, 0, 0), ] metrics_with_bad_val = [Metric("good-metric-key", "not-a-double-val", 0, 0)] + metrics_with_bool_val = [Metric("good-metric-key", True, 0, 0)] metrics_with_bad_ts = [Metric("good-metric-key", 1.0, "not-a-timestamp", 0)] metrics_with_neg_ts = [Metric("good-metric-key", 1.0, -123, 0)] metrics_with_bad_step = [Metric("good-metric-key", 1.0, 0, "not-a-step")] @@ -145,6 +156,7 @@ def test_validate_batch_log_data(): "metrics": [ metrics_with_bad_key, metrics_with_bad_val, + metrics_with_bool_val, metrics_with_bad_ts, metrics_with_neg_ts, metrics_with_bad_step,