diff --git a/.github/workflows/train.yaml b/.github/workflows/train.yaml index bc1f8001..321325b3 100644 --- a/.github/workflows/train.yaml +++ b/.github/workflows/train.yaml @@ -12,7 +12,8 @@ jobs: run: | pip install -r requirements.txt dvc repro - + + # This is needed to compare the differences of the various ML approaches git fetch --prune dvc metrics diff --show-md master > report.md diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..17e15f27 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/dvc.lock b/dvc.lock index 64c1df08..82ef3799 100644 --- a/dvc.lock +++ b/dvc.lock @@ -1,30 +1,41 @@ -get_data: - cmd: python get_data.py - deps: - - path: get_data.py - md5: 1db5f442403042e0403c75132fe59af4 - outs: - - path: data_raw.csv - md5: a6aec8da63a5fa2619af025a76746f29 -process: - cmd: python process_data.py - deps: - - path: data_raw.csv - md5: a6aec8da63a5fa2619af025a76746f29 - - path: process_data.py - md5: 79b357c12f171f3d07c76780815b651c - outs: - - path: data_processed.csv - md5: 3b20a3a6ac0570f3de28b77d1e88f932 -train: - cmd: python train.py - deps: - - path: data_processed.csv - md5: 3b20a3a6ac0570f3de28b77d1e88f932 - - path: train.py - md5: 80ad33d8caf823fc1d5cdefcb5b9490a - outs: - - path: by_region.png - md5: e7f3818fac35589b0c46dd65f8293e74 - - path: metrics.json - md5: f4844c28505568f336c5f91db3f1beb3 +schema: '2.0' +stages: + get_data: + cmd: python get_data.py + deps: + - path: get_data.py + md5: 1db5f442403042e0403c75132fe59af4 + size: 516 + outs: + - path: data_raw.csv + md5: a6aec8da63a5fa2619af025a76746f29 + size: 68868 + process: + cmd: python process_data.py + deps: + - path: data_raw.csv + md5: a6aec8da63a5fa2619af025a76746f29 + size: 68868 + - path: process_data.py + md5: 79b357c12f171f3d07c76780815b651c + size: 928 + outs: + - path: data_processed.csv + md5: 03269b6c0a766dc80e090326008765b3 + size: 24333 + train: + cmd: python train.py + deps: + - path: data_processed.csv + md5: 03269b6c0a766dc80e090326008765b3 + size: 24333 + - path: train.py + md5: 80ad33d8caf823fc1d5cdefcb5b9490a + size: 1524 + outs: + - path: by_region.png + md5: a789d2d4025a7b0750e17088fe032d1f + size: 7914 + - path: metrics.json + md5: f4844c28505568f336c5f91db3f1beb3 + size: 89