Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
183 commits
Select commit Hold shift + click to select a range
4a23b9a
First commit
mrocklin Nov 18, 2016
2c9e6b9
add travis badge to readme
mrocklin Nov 18, 2016
4954924
fix setup.py
mrocklin Nov 18, 2016
7d940eb
flake8
mrocklin Nov 18, 2016
52505fe
Update travis.yml (#1)
mrocklin Nov 18, 2016
0133780
Update travis.yml (#1)
mrocklin Nov 18, 2016
ebc74b7
Added backtracking line search tests.
cicdw Dec 13, 2016
b53a3c9
Reorganized code into base.py and models.py
cicdw Dec 13, 2016
401ebf4
Changed the algorithm API slightly. Series based tests still can fail.
cicdw Dec 20, 2016
885cde0
Refactored to standalone optimization algorithms for Logistic Regress…
cicdw Jan 10, 2017
ed681ff
Added proximal gradient method.
cicdw Jan 11, 2017
bee0d7a
Added a function for creating some logistic output.
cicdw Jan 11, 2017
5fc17a7
Edited some default settings.
cicdw Jan 12, 2017
f63c37e
Performance tweaks
mrocklin Jan 12, 2017
bfcf708
Separated out line search, used numba for log likelihood.
Jan 13, 2017
dfe55eb
use persist function from dask
mrocklin Jan 22, 2017
4d02925
Add initial implementation of logistic regression with l1 penalty
Jan 18, 2017
04247a8
Add dask bleeding edge version
Jan 25, 2017
611cd21
Add dask bleeding edge version
Jan 25, 2017
f788301
Merge branch 'master' of https://github.com/dask/dask-glm into hussai…
Jan 26, 2017
cb0f41e
Merge branch 'hussainsultan-master'
Jan 26, 2017
989ec80
Add flake8 compatability
Jan 26, 2017
7950204
Remove `compute` from bfgs
Jan 26, 2017
be8fb6f
Merge pull request #2 from hussainsultan/master
cicdw Jan 26, 2017
b3c9a18
Support absence of Numba
mrocklin Jan 26, 2017
08926d4
optimize newton
mrocklin Jan 26, 2017
a38f5a1
optimize bfgs
mrocklin Jan 26, 2017
695ef81
Consolidated sigmoid functions.
Jan 26, 2017
53b8a2f
Removed duplicated gradient.py file and made sure old test still passes.
Jan 26, 2017
1f1bd90
squeeze y
mrocklin Jan 26, 2017
da4fb1a
Added LFBGS convergence checks and a high-level test for unregularize…
Jan 26, 2017
9bd1270
Added LFBGS convergence checks and a high-level test for unregularize…
Jan 26, 2017
488964b
Added high-level test for unregularized optimizers.
Jan 26, 2017
2edf995
Merge pull request #10 from mrocklin/dev
cicdw Jan 26, 2017
f873e65
flake8
mrocklin Jan 26, 2017
0107edc
Merged in upstream changes.
Jan 27, 2017
cbc45e7
Tried to get BFGS to pass tests, but still failing.
Jan 27, 2017
daded67
Clean up ADMM / BFGS tests.
cicdw Jan 27, 2017
2640a78
Merge pull request #15 from moody-marlin/admm
hussainsultan Jan 27, 2017
62a63e0
Remove files
Jan 27, 2017
c1e4bcf
Merge pull request #18 from hussainsultan/master
hussainsultan Jan 27, 2017
4a53a02
Add basic convergence tests (#20)
mrocklin Jan 30, 2017
bf54d48
Use persist in proximal grad (#21)
mrocklin Jan 31, 2017
ea60598
ADMM (#22)
cicdw Feb 1, 2017
78b782c
test correctness of admm (#24)
mrocklin Feb 2, 2017
74ba5e7
Added notes on sigmoid approximation.
inati Feb 5, 2017
477be3f
Parallelize stepsize computation (#25)
mrocklin Feb 13, 2017
9065460
Merge pull request #27 from inati/master
hussainsultan Feb 13, 2017
496ac79
Abstract away function / gradient calls in algorithms
cicdw Feb 18, 2017
a2cf89a
Update tests to run with refactor; fix newton
cicdw Feb 18, 2017
793310f
Add normal model functions (untested).
cicdw Feb 18, 2017
3e0ee95
Fix ADMM overwrite; add default args to local_update
cicdw Feb 20, 2017
ba5879c
Add notebook overviewing optimality concerns.
cicdw Feb 20, 2017
e721117
Remove verbosity from proximal_grad
cicdw Feb 20, 2017
50e86a9
Refactor to staticmethod classes holding each GLM family
cicdw Feb 21, 2017
326a8a9
Add coverage reports and config file for pytest-cov
cicdw Feb 21, 2017
9b8029d
Removed numba.jit for now
cicdw Feb 21, 2017
d4e8f73
Update algorithms to take in class rather than individual functions
cicdw Feb 21, 2017
2b24b19
Resolve merge conflicts
cicdw Feb 21, 2017
c8303a5
Update to reflect step-size changes that were overwritten and clean-up.
cicdw Feb 21, 2017
bc883aa
fix bfgs to return beta if stepsize is 0
cicdw Feb 21, 2017
2d2f75f
xfail any bfgs tests.
cicdw Feb 22, 2017
cd2ba1a
Add unregularized test for crude optimality
cicdw Feb 22, 2017
84c53ff
Add regularized tests, allow for families in proximal_grad
cicdw Feb 22, 2017
666bfab
Merge pull request #28 from moody-marlin/abstract_algos
cicdw Feb 22, 2017
422107d
Add regularizer classes for l1/l2.
cicdw Feb 22, 2017
98eacf1
Adjust proximal_grad to handle reg class; tests passing.
cicdw Feb 22, 2017
80643b4
flaked
cicdw Feb 23, 2017
faaed1e
Rename local admm functions; adjust admm tests to include Normal family.
cicdw Feb 23, 2017
75a0d86
Add conda environment .yml file.
cicdw Feb 23, 2017
8ca2bc8
Merge pull request #29 from moody-marlin/regularizer_class
cicdw Feb 23, 2017
31cae5b
Rename dask_glm.yml to environment.yml
cicdw Feb 23, 2017
ba18df4
Add test to ensure determinism
mrocklin Mar 21, 2017
d70731d
add distributed test
mrocklin Mar 21, 2017
2518af6
Change max_iter -> max_steps
mrocklin Mar 21, 2017
fd2399b
flake8
mrocklin Mar 21, 2017
e720572
Merge pull request #36 from mrocklin/deterministic
cicdw Mar 21, 2017
d41308d
Update scaling in admm convergence check; fix scipy calls. (#37)
cicdw Mar 22, 2017
b423c73
Relax requirements
mrocklin Apr 10, 2017
88854df
Merge pull request #39 from mrocklin/requirements
cicdw Apr 10, 2017
fe7b875
API: Implement scikit-learn compat API
TomAugspurger Apr 12, 2017
670d554
Merge pull request #40 from TomAugspurger/api
cicdw Apr 23, 2017
d8c6251
Support sparse arrays (#42)
mrocklin Apr 27, 2017
47c6baf
Update Logistic loglike to prevent overflow.
cicdw Apr 28, 2017
150bd7f
Spike out standardize decorator.
Apr 30, 2017
11c3ce5
Spike out normalize decorator with tests.
Apr 30, 2017
b2bbc73
Decorate the halls; distributed test fails.
Apr 30, 2017
5fa9b7a
Remove all prints; should be handled via warnings.
Apr 30, 2017
3f58be5
Add input scaling for non-intercept fits.
May 2, 2017
b0d1eed
Fix distributed determinism test with copy.
May 2, 2017
f413fed
Increase lambda to decrease test failures.
May 2, 2017
e79d642
Flaked
May 2, 2017
b251bfc
Add Poisson regression support (WIP). (#46)
mpancia May 3, 2017
b2b6f10
Elastic Net Regularizer (#49)
postelrich May 4, 2017
dfdb6aa
Merge upstream master.
May 5, 2017
ffaadb5
Decorator injects normalize kwarg.
May 5, 2017
0cff71c
Normalize raises if multiple constants detected.
May 5, 2017
192a647
Add comment on copy
May 5, 2017
d39d15f
Remove newton doc string for now
May 5, 2017
3833542
flaked
May 5, 2017
d5dd10e
L-BFGS solver based on scipy.optimize with L2 regularization (#50)
MLnick May 9, 2017
2600b7c
Merge in lbfgs.
May 10, 2017
e5733af
Remove level of depth from normalize decorator
May 10, 2017
8127186
Merge pull request #44 from moody-marlin/logistic-overflow
TomAugspurger May 10, 2017
2865eb1
DOC: Add module documentation
TomAugspurger May 2, 2017
91cbda4
BUG: Accept **kwargs in rest of algorithms
TomAugspurger May 10, 2017
ef0360a
Merge pull request #51 from TomAugspurger/docs
TomAugspurger May 11, 2017
d54fc61
DOC: Add extra requirements
TomAugspurger May 11, 2017
aae15bd
DOC: Add links to readthedocs
TomAugspurger May 16, 2017
0e9d6cc
Use setuptools scm
TomAugspurger May 22, 2017
d4312a0
Merge pull request #56 from TomAugspurger/setuptools-scm
TomAugspurger May 23, 2017
581cf9f
FIX : broken notebook + travis
agramfort Jul 13, 2017
46ca8f9
fix travis
agramfort Jul 13, 2017
c54ff85
fix travis
agramfort Jul 13, 2017
69c7e1b
copy
agramfort Jul 18, 2017
d2fdba2
DOC: Run all notebooks on RTD
TomAugspurger Jul 22, 2017
e844098
DOC: Bump timeout
TomAugspurger Jul 22, 2017
237828f
DOC: Math and fixed headings
TomAugspurger Jul 22, 2017
2a79dfa
Merge pull request #58 from agramfort/fix_notebook
TomAugspurger Jul 26, 2017
90de97c
CLN: Various cleanups in prep for a release today
TomAugspurger Oct 2, 2017
45156f1
Merge pull request #61 from TomAugspurger/release-prep
TomAugspurger Oct 2, 2017
6930c12
RLS: 0.1.0
TomAugspurger Oct 2, 2017
64e01eb
Update and normalize docstrings (#62)
mrocklin Oct 12, 2017
86a220b
Update to use dask.config.set and schduler keyword (#72)
jrbourbeau Oct 24, 2018
d9bd394
RLS: 0.2.0
TomAugspurger Oct 24, 2018
8ae6a96
Fix some documentation typos (#71)
zdgriffith Nov 16, 2018
6f7f154
Add n_iter_ attribute to estimators
pentschev Mar 20, 2019
e2d7e19
Fix tests, include tests to check number of iterations
pentschev Mar 20, 2019
1664bee
Fix flake8 error on Python 2.7
pentschev Mar 20, 2019
34766b2
Add missing n_iter_ estimators docstring
pentschev Mar 22, 2019
83af95f
Fix newton number of interations computation
pentschev Mar 25, 2019
0c51415
Scatter lbfgs current weights to workers
jdlesage Sep 10, 2019
6f9ecf5
Remove python2.7 tests in travis
jdlesage Sep 11, 2019
d6213b9
Merge remote-tracking branch 'jdlesage/remove_python2' into HEAD
jdlesage Sep 11, 2019
ef2b463
Merge pull request #81 from jdlesage/remove_python2
TomAugspurger Sep 11, 2019
251f9ac
Use current dask client instead of inject it
jdlesage Sep 13, 2019
31491e4
Fix huge typo in the array to send
jdlesage Sep 16, 2019
0e840cb
Change import of dask distributed
jdlesage Sep 16, 2019
5bfcb06
Merge pull request #80 from jdlesage/broadcast_weights_lbfgs
TomAugspurger Sep 16, 2019
62d61ee
Support sparse matrix
jdlesage Sep 18, 2019
a91ee8c
Add sparse to the lib to install by travis
jdlesage Sep 18, 2019
0ae6b6b
Use _meta to detect an array is sparse.
jdlesage Sep 20, 2019
eaf241e
Force sparse version >= 0.7.0
jdlesage Sep 23, 2019
a06b67e
Force sparse version also when creating conda env
jdlesage Sep 23, 2019
c42c832
Update environement.yml
jdlesage Sep 24, 2019
989bf56
list installed packages
TomAugspurger Sep 24, 2019
e8418ea
Update version of numpy
jdlesage Sep 24, 2019
490cc96
Test using python3.7 as numpy version is too old on python3.5
jdlesage Sep 25, 2019
c4a9bbc
utils is now private in sparse
jdlesage Sep 25, 2019
af0c1f7
Use public sparse utils method
jdlesage Sep 26, 2019
cf739de
Use base class SparseArray in dispatch
jdlesage Sep 27, 2019
34122dc
Add a unit test for DOK matrix as xfail
jdlesage Sep 27, 2019
64b4ff9
Fix linting
jdlesage Sep 27, 2019
db949cb
Merge pull request #82 from jdlesage/sparse_matrix
TomAugspurger Sep 30, 2019
5c15522
Merge dask-glm
TomAugspurger Oct 16, 2019
81fa118
moves
TomAugspurger Oct 16, 2019
622e058
moves
TomAugspurger Oct 16, 2019
b762dd4
moves
TomAugspurger Oct 16, 2019
7c9d804
fixups
TomAugspurger Oct 16, 2019
d9f3b6c
tests
TomAugspurger Oct 16, 2019
9ee9e43
fixups
TomAugspurger Oct 16, 2019
a7f94e9
fixups
TomAugspurger Oct 16, 2019
56b0ee7
fixups
TomAugspurger Oct 16, 2019
0f33e93
speedup poisson test
TomAugspurger Oct 16, 2019
f6cd65a
fixups
TomAugspurger Oct 17, 2019
7044353
Merge remote-tracking branch 'dask-glm-pentschev/n_iter-attribute' in…
TomAugspurger Oct 17, 2019
9714251
remove coverage
TomAugspurger Oct 17, 2019
6059f3c
remove configs
TomAugspurger Oct 17, 2019
84d32ab
remove dead directory
TomAugspurger Oct 17, 2019
42678e3
remove conf
TomAugspurger Oct 17, 2019
537d709
remove duplicated examples
TomAugspurger Oct 17, 2019
7621b55
remove index
TomAugspurger Oct 17, 2019
c6aa8e0
Merge remote-tracking branch 'upstream/master' into merge-dask-glm
TomAugspurger Oct 17, 2019
c5c2d23
fixup env
TomAugspurger Oct 17, 2019
c319c37
bump
TomAugspurger Oct 17, 2019
dedfe87
fixed merge conflict
TomAugspurger Oct 17, 2019
d8bdac9
handle n_iter
TomAugspurger Oct 17, 2019
fb327a3
bump for array_function
TomAugspurger Oct 17, 2019
e0a92a4
fixups
TomAugspurger Oct 17, 2019
5d476c0
Merge remote-tracking branch 'upstream/master' into merge-dask-glm
TomAugspurger Jun 23, 2020
0ab03bf
revert GLM changes
TomAugspurger Jun 24, 2020
cf2a075
maybe fixups
TomAugspurger Jun 24, 2020
57123d8
skip slow tests
TomAugspurger Jun 24, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,4 @@ docs/source/auto_examples/
docs/source/examples/mydask.png

dask-worker-space
.coverage
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ repos:
rev: v4.3.21
hooks:
- id: isort

2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
THE POSSIBILITY OF SUCH DAMAGE.
2 changes: 1 addition & 1 deletion ci/environment-3.7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- multipledispatch >=0.4.9
- mypy
- numba
- numpy >=1.16.3
- numpy >=1.17.0
- numpydoc
- packaging
- pandas
Expand Down
1 change: 1 addition & 0 deletions ci/environment-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies:
- tornado
- toolz
- xgboost
- dask-xgboost
- zict
- pip
- dask
Expand Down
11 changes: 11 additions & 0 deletions dask_ml/_compat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import contextlib
import importlib
import os
from collections.abc import Mapping # noqa
from typing import Any, List, Optional, Union
Expand All @@ -19,6 +20,7 @@
SK_024 = SK_VERSION >= packaging.version.parse("0.24.0.dev0")
DASK_240 = DASK_VERSION >= packaging.version.parse("2.4.0")
DASK_2130 = DASK_VERSION >= packaging.version.parse("2.13.0")
DASK_2200 = DASK_VERSION > packaging.version.parse("2.19.0") # TODO: update to >=
DISTRIBUTED_2_5_0 = DISTRIBUTED_VERSION > packaging.version.parse("2.5.0")
DISTRIBUTED_2_11_0 = DISTRIBUTED_VERSION > packaging.version.parse("2.10.0") # dev
WINDOWS = os.name == "nt"
Expand All @@ -40,6 +42,15 @@ def check_is_fitted(est, attributes: Optional[Union[str, List[str]]] = None):
return sklearn.utils.validation.check_is_fitted(est, *args)


def _import_sparse():
try:
return importlib.import_module("sparse")
except ImportError:
raise ImportError(
"This requires the optional 'sparse' library. Please install 'sparse'."
)


def _check_multimetric_scoring(estimator, scoring=None):
from sklearn.metrics._scorer import _check_multimetric_scoring

Expand Down
8 changes: 8 additions & 0 deletions dask_ml/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
from sklearn.base import BaseEstimator


def is_sparse(x):
try:
from sparse import SparseArray
except ImportError:
return False
return isinstance(x, SparseArray)


def copy_learned_attributes(from_estimator, to_estimator):
attrs = {k: v for k, v in vars(from_estimator).items() if k.endswith("_")}

Expand Down
21 changes: 21 additions & 0 deletions dask_ml/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import dask_ml.utils

from . import _compat


def _check_axis_partitioning(chunks, n_features):
c = chunks[1][0]
Expand All @@ -30,6 +32,7 @@ def make_counts(
scale=1.0,
chunks=100,
random_state=None,
is_sparse=False,
):
"""
Generate a dummy dataset for modeling count data.
Expand Down Expand Up @@ -72,6 +75,11 @@ def make_counts(
z0 = X[:, informative_idx].dot(beta[informative_idx])
rate = da.exp(z0)
y = rng.poisson(rate, size=1, chunks=(chunks,))

if is_sparse:
sparse = _compat._import_sparse()
X = X.map_blocks(sparse.COO)

return X, y


Expand Down Expand Up @@ -218,6 +226,7 @@ def make_regression(
coef=False,
random_state=None,
chunks=None,
is_sparse=False,
):
"""
Generate a random regression problem.
Expand Down Expand Up @@ -334,6 +343,10 @@ def make_regression(

y_big = y_big.squeeze()

if is_sparse:
sparse = _compat._import_sparse()
X_big = X_big.map_blocks(sparse.COO)

if return_coef:
return X_big, y_big, coef
else:
Expand All @@ -357,6 +370,7 @@ def make_classification(
shuffle=True,
random_state=None,
chunks=None,
is_sparse=False,
):
chunks = da.core.normalize_chunks(chunks, (n_samples, n_features))
_check_axis_partitioning(chunks, n_features)
Expand All @@ -378,9 +392,16 @@ def make_classification(
y = rng.random(z0.shape, chunks=chunks[0]) < 1 / (1 + da.exp(-z0))
y = y.astype(int)

if is_sparse:
sparse = _compat._import_sparse()
X = X.map_blocks(sparse.COO)

return X, y


make_poisson = make_counts


def random_date(start, end):
delta = end - start
int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
Expand Down
Loading