dbczumar · mohamad-arabi · Dec 10, 2020 · Dec 11, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/.github/workflows/cross-version-tests.yml b/.github/workflows/cross-version-tests.yml
@@ -13,14 +13,15 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
+      is_matrix_empty: ${{ steps.set-matrix.outputs.is_matrix_empty }}
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
         with:
           python-version: "3.6"
       - name: Install dependencies
         run: |
-          pip install pyyaml pytest
+          pip install packaging pyyaml pytest
       - name: Test set_matrix.py
         run: |
           pytest dev/set_matrix.py --doctest-modules --verbose
@@ -40,6 +41,7 @@ jobs:
           fi
   test:
     needs: set-matrix
+    if: ${{ needs.set-matrix.outputs.is_matrix_empty == 'false' }}
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false

diff --git a/dev/set_matrix.py b/dev/set_matrix.py
@@ -34,7 +34,7 @@
 """
 
 import argparse
-from distutils.version import LooseVersion
+from packaging.version import Version
 import json
 import operator
 import os
@@ -104,35 +104,6 @@ def get_released_versions(package_name):
     return versions
 
 
-def get_major_version(ver):
-    """
-    Examples
-    --------
-    >>> get_major_version("1.2.3")
-    1
-    """
-    return LooseVersion(ver).version[0]
-
-
-def is_final_release(ver):
-    """
-    Returns True if the given version matches PEP440's final release scheme.
-
-    Examples
-    --------
-    >>> is_final_release("0.1")
-    True
-    >>> is_final_release("0.23.0")
-    True
-    >>> is_final_release("0.4.0a1")
-    False
-    >>> is_final_release("0.5.0rc")
-    False
-    """
-    # Ref.: https://www.python.org/dev/peps/pep-0440/#final-releases
-    return re.search(r"^\d+(\.\d+)+$", ver) is not None
-
-
 def select_latest_micro_versions(versions):
     """
     Selects the latest micro version in each minor version.
@@ -155,10 +126,10 @@ def select_latest_micro_versions(versions):
     for ver, _ in sorted(
         versions.items(),
         # Sort by (minor_version, upload_time) in descending order
-        key=lambda x: (LooseVersion(x[0]).version[:2], x[1]),
+        key=lambda x: (Version(x[0]).release[:2], x[1]),
         reverse=True,
     ):
-        minor_ver = tuple(LooseVersion(ver).version[:2])  # A set doesn't accept a list
+        minor_ver = Version(ver).release[:2]
 
         if minor_ver not in seen_minors:
             seen_minors.add(minor_ver)
@@ -171,9 +142,10 @@ def filter_versions(versions, min_ver, max_ver, excludes=None):
     """
     Filter versions that satisfy the following conditions:
 
-    1. is newer than or equal to `min_ver`
-    2. shares the same major version as `max_ver` or `min_ver`
-    3. (Optional) is not in `excludes`
+    1. is a final or post release that PEP 440 defines
+    2. is newer than or equal to `min_ver`
+    3. shares the same major version as `max_ver` or `min_ver`
+    4. (Optional) is not in `excludes`
 
     Examples
     --------
@@ -198,12 +170,16 @@ def filter_versions(versions, min_ver, max_ver, excludes=None):
     assert max_ver in versions
     assert all(v in versions for v in excludes)
 
-    versions = {v: t for v, t in versions.items() if v not in excludes}
-    versions = {v: t for v, t in versions.items() if is_final_release(v)}
+    versions = {Version(v): t for v, t in versions.items() if v not in excludes}
 
-    max_major = get_major_version(max_ver)
-    versions = {v: t for v, t in versions.items() if get_major_version(v) <= max_major}
-    versions = {v: t for v, t in versions.items() if LooseVersion(v) >= LooseVersion(min_ver)}
+    def _is_final_or_post_release(v):
+        # final release: https://www.python.org/dev/peps/pep-0440/#final-releases
+        # post release: https://www.python.org/dev/peps/pep-0440/#post-releases
+        return (v.base_version == v.public) or (v.is_postrelease)
+
+    versions = {v: t for v, t in versions.items() if _is_final_or_post_release(v)}
+    versions = {v: t for v, t in versions.items() if v.major <= Version(max_ver).major}
+    versions = {str(v): t for v, t in versions.items() if v >= Version(min_ver)}
 
     return versions
 
@@ -324,8 +300,7 @@ def process_requirements(requirements, version=None):
             op_and_ver_pairs = map(get_operator_and_version, ver_spec.split(","))
             match_all = all(
                 comp_op(
-                    LooseVersion(version),
-                    LooseVersion(dev_numeric if req_ver == DEV_VERSION else req_ver),
+                    Version(version), Version(dev_numeric if req_ver == DEV_VERSION else req_ver),
                 )
                 for comp_op, req_ver in op_and_ver_pairs
             )
@@ -475,7 +450,9 @@ def main():
     )
     diff_flavor = set(filter(lambda x: x["flavor"] in changed_flavors, matrix))
 
-    include = sorted(diff_config.union(diff_flavor), key=lambda x: x["job_name"])
+    # If this file contains changes, re-run all the tests, otherwise re-run the affected tests.
+    include = matrix if (__file__ in changed_files) else diff_config.union(diff_flavor)
+    include = sorted(include, key=lambda x: x["job_name"])
     job_names = [x["job_name"] for x in include]
 
     matrix = {"job_name": job_names, "include": include}
@@ -488,6 +465,10 @@ def main():
         # Note that this actually doesn't print anything to the console.
         print("::set-output name=matrix::{}".format(json.dumps(matrix)))
 
+        # Set a flag that indicates whether or not the matrix is empty. If this flag is 'true',
+        # skip the subsequent jobs.
+        print("::set-output name=is_matrix_empty::{}".format("false" if job_names else "true"))
+
 
 if __name__ == "__main__":
     main()
diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -146,8 +146,22 @@ names, matching is done by position (i.e. MLflow will only check the number of c
 Column Type Enforcement
 """""""""""""""""""""""
 The input column types are checked against the signature. MLflow will perform safe type conversions
-if necessary. Generally, only upcasts (e.g. integer -> long or float -> double) are considered to be
-safe. If the types cannot be made compatible, MLflow will raise an error.
+if necessary. Generally, only conversions that are guaranteed to be lossless are allowed. For
+example, int -> long or int -> double conversions are ok, long -> double is not. If the types cannot
+be made compatible, MLflow will raise an error.
+
+Handling Integers With Missing Values
+"""""""""""""""""""""""""""""""""""""
+Integer data with missing values is typically represented as floats in Python. Therefore, data
+types of integer columns in Python can vary depending on the data sample. This type variance can
+cause schema enforcement errors at runtime since integer and float are not compatible types. For
+example, if your training data did not have any missing values for integer column c, its type will
+be integer. However, when you attempt to score a sample of the data that does include a missing
+value in column c, its type will be float. If your model signature specified c to have integer type,
+MLflow will raise an error since it can not convert float to int. Note that MLflow uses python to
+serve models and to deploy models to Spark, so this can affect most model deployments. The best way
+to avoid this problem is to declare integer columns as doubles (float64) whenever there can be
+missing values.
 
 How To Log Models With Signatures
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/ml-package-versions.yml b/ml-package-versions.yml
@@ -152,3 +152,39 @@ lightgbm:
     requirements: ["scikit-learn", "matplotlib"]
     run: |
       pytest tests/lightgbm/test_lightgbm_autolog.py --large
+
+gluon:
+  package_info:
+    pip_release: "mxnet"
+    install_dev: |
+      pip install --pre mxnet -f https://dist.mxnet.io/python/cpu
+
+  models:
+    minimum: "1.5.1"
+    maximum: "1.7.0.post1"
+    run: |
+      pytest tests/gluon/test_gluon_model_export.py --large
+
+  autologging:
+    minimum: "1.5.1"
+    maximum: "1.7.0.post1"
+    run: |
+      pytest tests/gluon_autolog/test_gluon_autolog.py --large
+
+fastai-1.x:
+  package_info:
+    pip_release: "fastai"
+
+  models:
+    minimum: "1.0.60"
+    maximum: "1.0.61"
+    requirements: ["scikit-learn"]
+    run: |
+      pytest tests/fastai/test_fastai_model_export.py --large
+
+  autologging:
+    minimum: "1.0.60"
+    maximum: "1.0.61"
+    requirements: ["scikit-learn"]
+    run: |
+      pytest tests/fastai/test_fastai_autolog.py --large
diff --git a/mlflow/_spark_autologging.py b/mlflow/_spark_autologging.py
@@ -15,7 +15,11 @@
 from mlflow.tracking.client import MlflowClient
 from mlflow.tracking.context.abstract_context import RunContextProvider
 from mlflow.utils import gorilla
-from mlflow.utils.autologging_utils import wrap_patch
+from mlflow.utils.autologging_utils import (
+    wrap_patch,
+    autologging_is_disabled,
+)
+from mlflow.spark import FLAVOR_NAME
 
 _JAVA_PACKAGE = "org.mlflow.spark.autologging"
 _SPARK_TABLE_INFO_TAG_NAME = "sparkDatasourceInfo"
@@ -217,6 +221,8 @@ def _notify(self, path, version, data_format):
         Method called by Scala SparkListener to propagate datasource read events to the current
         Python process
         """
+        if autologging_is_disabled(FLAVOR_NAME):
+            return
         # If there's an active run, simply set the tag on it
         # Note that there's a TOCTOU race condition here - active_run() here can actually throw
         # if the main thread happens to end the run & pop from the active run stack after we check
@@ -248,6 +254,9 @@ def in_context(self):
         return True
 
     def tags(self):
+        # if autologging is disabled, then short circuit `tags()` and return empty dict.
+        if autologging_is_disabled(FLAVOR_NAME):
+            return {}
         with _lock:
             global _table_infos
             seen = set()

diff --git a/mlflow/gluon.py b/mlflow/gluon.py
@@ -1,3 +1,4 @@
+from distutils.version import LooseVersion
 import os
 
 import pandas as pd
@@ -48,6 +49,7 @@ def load_model(model_uri, ctx):
         model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model")
         model(nd.array(np.random.rand(1000, 1, 32)))
     """
+    import mxnet
     from mxnet import gluon
     from mxnet import sym
 
@@ -58,7 +60,10 @@ def load_model(model_uri, ctx):
     symbol = sym.load(model_arch_path)
     inputs = sym.var("data", dtype="float32")
     net = gluon.SymbolBlock(symbol, inputs)
-    net.collect_params().load(model_params_path, ctx)
+    if LooseVersion(mxnet.__version__) >= LooseVersion("2.0.0"):
+        net.load_parameters(model_params_path, ctx)
+    else:
+        net.collect_params().load(model_params_path, ctx)
     return net