Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
bcd1ba3
Add public API for autologging integration configs
mohamad-arabi Dec 10, 2020
1c72efe
Prefix with Lightning (#3809)
harupy Dec 11, 2020
5a26446
[HOT FIX] skip `test` job if matrix ends up being empty in cross vers…
harupy Dec 11, 2020
f43e5b6
Allow using post releases in the cross version tests (#3807)
harupy Dec 11, 2020
3ce637f
Add support for disabling spark autologging
mohamad-arabi Dec 11, 2020
b865cce
Merge branch 'interf_proto' into disable-flag-spark
mohamad-arabi Dec 11, 2020
26da1b4
Remove unused method
mohamad-arabi Dec 11, 2020
b2d0771
Remove unused method part 2
mohamad-arabi Dec 11, 2020
fa21520
Introduce utilities for autologging error tolerance / safety (#3682)
dbczumar Dec 12, 2020
2826878
reject bool metric value (#3822)
HCoban Dec 14, 2020
09f4f24
Update schema enforcement (#3798)
tomasatdatabricks Dec 14, 2020
f2c854e
Fix `AttributeError: 'Dataset' object has no attribute 'value'` in h5…
harupy Dec 14, 2020
aec3be2
Add gluon to the cross version tests (#3826)
harupy Dec 14, 2020
26fed59
Fix invalid metric error in statsmodels tests (#3828)
harupy Dec 14, 2020
0be7352
Add fastai to the cross version tests (#3830)
harupy Dec 14, 2020
24211df
Add autologging safety utils to several autologging integrations (#3815)
dbczumar Dec 14, 2020
94bc305
add test case for before spark session
mohamad-arabi Dec 14, 2020
c538fdf
Merge branch master into disable-flag-spark
mohamad-arabi Dec 14, 2020
9eac976
unnecessary change
mohamad-arabi Dec 14, 2020
0ec7400
modify comment
mohamad-arabi Dec 14, 2020
4feeb11
cannot assign FLAVOR_NAME in _spark_autolgging.py
mohamad-arabi Dec 14, 2020
cf7a1a0
address final comments
mohamad-arabi Dec 14, 2020
a708ada
fix api documentation
mohamad-arabi Dec 14, 2020
bc59ab0
fix api documentation II
mohamad-arabi Dec 15, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/cross-version-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ jobs:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
is_matrix_empty: ${{ steps.set-matrix.outputs.is_matrix_empty }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.6"
- name: Install dependencies
run: |
pip install pyyaml pytest
pip install packaging pyyaml pytest
- name: Test set_matrix.py
run: |
pytest dev/set_matrix.py --doctest-modules --verbose
Expand All @@ -40,6 +41,7 @@ jobs:
fi
test:
needs: set-matrix
if: ${{ needs.set-matrix.outputs.is_matrix_empty == 'false' }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
Expand Down
67 changes: 24 additions & 43 deletions dev/set_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"""

import argparse
from distutils.version import LooseVersion
from packaging.version import Version
import json
import operator
import os
Expand Down Expand Up @@ -104,35 +104,6 @@ def get_released_versions(package_name):
return versions


def get_major_version(ver):
"""
Examples
--------
>>> get_major_version("1.2.3")
1
"""
return LooseVersion(ver).version[0]


def is_final_release(ver):
"""
Returns True if the given version matches PEP440's final release scheme.

Examples
--------
>>> is_final_release("0.1")
True
>>> is_final_release("0.23.0")
True
>>> is_final_release("0.4.0a1")
False
>>> is_final_release("0.5.0rc")
False
"""
# Ref.: https://www.python.org/dev/peps/pep-0440/#final-releases
return re.search(r"^\d+(\.\d+)+$", ver) is not None


def select_latest_micro_versions(versions):
"""
Selects the latest micro version in each minor version.
Expand All @@ -155,10 +126,10 @@ def select_latest_micro_versions(versions):
for ver, _ in sorted(
versions.items(),
# Sort by (minor_version, upload_time) in descending order
key=lambda x: (LooseVersion(x[0]).version[:2], x[1]),
key=lambda x: (Version(x[0]).release[:2], x[1]),
reverse=True,
):
minor_ver = tuple(LooseVersion(ver).version[:2]) # A set doesn't accept a list
minor_ver = Version(ver).release[:2]

if minor_ver not in seen_minors:
seen_minors.add(minor_ver)
Expand All @@ -171,9 +142,10 @@ def filter_versions(versions, min_ver, max_ver, excludes=None):
"""
Filter versions that satisfy the following conditions:

1. is newer than or equal to `min_ver`
2. shares the same major version as `max_ver` or `min_ver`
3. (Optional) is not in `excludes`
1. is a final or post release that PEP 440 defines
2. is newer than or equal to `min_ver`
3. shares the same major version as `max_ver` or `min_ver`
4. (Optional) is not in `excludes`

Examples
--------
Expand All @@ -198,12 +170,16 @@ def filter_versions(versions, min_ver, max_ver, excludes=None):
assert max_ver in versions
assert all(v in versions for v in excludes)

versions = {v: t for v, t in versions.items() if v not in excludes}
versions = {v: t for v, t in versions.items() if is_final_release(v)}
versions = {Version(v): t for v, t in versions.items() if v not in excludes}

max_major = get_major_version(max_ver)
versions = {v: t for v, t in versions.items() if get_major_version(v) <= max_major}
versions = {v: t for v, t in versions.items() if LooseVersion(v) >= LooseVersion(min_ver)}
def _is_final_or_post_release(v):
# final release: https://www.python.org/dev/peps/pep-0440/#final-releases
# post release: https://www.python.org/dev/peps/pep-0440/#post-releases
return (v.base_version == v.public) or (v.is_postrelease)

versions = {v: t for v, t in versions.items() if _is_final_or_post_release(v)}
versions = {v: t for v, t in versions.items() if v.major <= Version(max_ver).major}
versions = {str(v): t for v, t in versions.items() if v >= Version(min_ver)}

return versions

Expand Down Expand Up @@ -324,8 +300,7 @@ def process_requirements(requirements, version=None):
op_and_ver_pairs = map(get_operator_and_version, ver_spec.split(","))
match_all = all(
comp_op(
LooseVersion(version),
LooseVersion(dev_numeric if req_ver == DEV_VERSION else req_ver),
Version(version), Version(dev_numeric if req_ver == DEV_VERSION else req_ver),
)
for comp_op, req_ver in op_and_ver_pairs
)
Expand Down Expand Up @@ -475,7 +450,9 @@ def main():
)
diff_flavor = set(filter(lambda x: x["flavor"] in changed_flavors, matrix))

include = sorted(diff_config.union(diff_flavor), key=lambda x: x["job_name"])
# If this file contains changes, re-run all the tests, otherwise re-run the affected tests.
include = matrix if (__file__ in changed_files) else diff_config.union(diff_flavor)
include = sorted(include, key=lambda x: x["job_name"])
job_names = [x["job_name"] for x in include]

matrix = {"job_name": job_names, "include": include}
Expand All @@ -488,6 +465,10 @@ def main():
# Note that this actually doesn't print anything to the console.
print("::set-output name=matrix::{}".format(json.dumps(matrix)))

# Set a flag that indicates whether or not the matrix is empty. If this flag is 'true',
# skip the subsequent jobs.
print("::set-output name=is_matrix_empty::{}".format("false" if job_names else "true"))


if __name__ == "__main__":
main()
18 changes: 16 additions & 2 deletions docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,22 @@ names, matching is done by position (i.e. MLflow will only check the number of c
Column Type Enforcement
"""""""""""""""""""""""
The input column types are checked against the signature. MLflow will perform safe type conversions
if necessary. Generally, only upcasts (e.g. integer -> long or float -> double) are considered to be
safe. If the types cannot be made compatible, MLflow will raise an error.
if necessary. Generally, only conversions that are guaranteed to be lossless are allowed. For
example, int -> long or int -> double conversions are ok, long -> double is not. If the types cannot
be made compatible, MLflow will raise an error.

Handling Integers With Missing Values
"""""""""""""""""""""""""""""""""""""
Integer data with missing values is typically represented as floats in Python. Therefore, data
types of integer columns in Python can vary depending on the data sample. This type variance can
cause schema enforcement errors at runtime since integer and float are not compatible types. For
example, if your training data did not have any missing values for integer column c, its type will
be integer. However, when you attempt to score a sample of the data that does include a missing
value in column c, its type will be float. If your model signature specified c to have integer type,
MLflow will raise an error since it can not convert float to int. Note that MLflow uses python to
serve models and to deploy models to Spark, so this can affect most model deployments. The best way
to avoid this problem is to declare integer columns as doubles (float64) whenever there can be
missing values.

How To Log Models With Signatures
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
36 changes: 36 additions & 0 deletions ml-package-versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,39 @@ lightgbm:
requirements: ["scikit-learn", "matplotlib"]
run: |
pytest tests/lightgbm/test_lightgbm_autolog.py --large

gluon:
package_info:
pip_release: "mxnet"
install_dev: |
pip install --pre mxnet -f https://dist.mxnet.io/python/cpu

models:
minimum: "1.5.1"
maximum: "1.7.0.post1"
run: |
pytest tests/gluon/test_gluon_model_export.py --large

autologging:
minimum: "1.5.1"
maximum: "1.7.0.post1"
run: |
pytest tests/gluon_autolog/test_gluon_autolog.py --large

fastai-1.x:
package_info:
pip_release: "fastai"

models:
minimum: "1.0.60"
maximum: "1.0.61"
requirements: ["scikit-learn"]
run: |
pytest tests/fastai/test_fastai_model_export.py --large

autologging:
minimum: "1.0.60"
maximum: "1.0.61"
requirements: ["scikit-learn"]
run: |
pytest tests/fastai/test_fastai_autolog.py --large
11 changes: 10 additions & 1 deletion mlflow/_spark_autologging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from mlflow.tracking.client import MlflowClient
from mlflow.tracking.context.abstract_context import RunContextProvider
from mlflow.utils import gorilla
from mlflow.utils.autologging_utils import wrap_patch
from mlflow.utils.autologging_utils import (
wrap_patch,
autologging_is_disabled,
)
from mlflow.spark import FLAVOR_NAME

_JAVA_PACKAGE = "org.mlflow.spark.autologging"
_SPARK_TABLE_INFO_TAG_NAME = "sparkDatasourceInfo"
Expand Down Expand Up @@ -217,6 +221,8 @@ def _notify(self, path, version, data_format):
Method called by Scala SparkListener to propagate datasource read events to the current
Python process
"""
if autologging_is_disabled(FLAVOR_NAME):
return
# If there's an active run, simply set the tag on it
# Note that there's a TOCTOU race condition here - active_run() here can actually throw
# if the main thread happens to end the run & pop from the active run stack after we check
Expand Down Expand Up @@ -248,6 +254,9 @@ def in_context(self):
return True

def tags(self):
# if autologging is disabled, then short circuit `tags()` and return empty dict.
if autologging_is_disabled(FLAVOR_NAME):
return {}
with _lock:
global _table_infos
seen = set()
Expand Down
7 changes: 6 additions & 1 deletion mlflow/gluon.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from distutils.version import LooseVersion
import os

import pandas as pd
Expand Down Expand Up @@ -48,6 +49,7 @@ def load_model(model_uri, ctx):
model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model")
model(nd.array(np.random.rand(1000, 1, 32)))
"""
import mxnet
from mxnet import gluon
from mxnet import sym

Expand All @@ -58,7 +60,10 @@ def load_model(model_uri, ctx):
symbol = sym.load(model_arch_path)
inputs = sym.var("data", dtype="float32")
net = gluon.SymbolBlock(symbol, inputs)
net.collect_params().load(model_params_path, ctx)
if LooseVersion(mxnet.__version__) >= LooseVersion("2.0.0"):
net.load_parameters(model_params_path, ctx)
else:
net.collect_params().load(model_params_path, ctx)
return net


Expand Down
Loading