From ee43033e5750d0b0d627707441e4716314108f43 Mon Sep 17 00:00:00 2001 From: Thilo von Neumann Date: Tue, 23 May 2023 11:17:43 +0200 Subject: [PATCH 1/3] Treat SparseArray as an array in collate_fn --- padertorch/data/utils.py | 48 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/padertorch/data/utils.py b/padertorch/data/utils.py index 08a39bf5..4d633110 100644 --- a/padertorch/data/utils.py +++ b/padertorch/data/utils.py @@ -21,11 +21,11 @@ def pad_tensor(vec, pad, axis): def collate_fn(batch): """Moves list inside of dict/dataclass recursively. - Can be used as map after batching of an dataset: + Can be used as map after batching of a dataset: `dataset.batch(...).map(collate_fn)` Args: - batch: list of examples + batch: list or tuple of examples Returns: @@ -40,30 +40,44 @@ def collate_fn(batch): {'a': {'b': [[1, 2], [3, 4]]}} >>> import dataclasses - >>> Point = dataclasses.make_dataclass('Point', ['x', 'y']) - >>> batch = [Point(1, 2), Point(3, 4)] + >>> Data = dataclasses.make_dataclass('Data', ['x', 'y']) + >>> batch = [Data(1, 2), Data(3, 4)] >>> batch - [Point(x=1, y=2), Point(x=3, y=4)] + [Data(x=1, y=2), Data(x=3, y=4)] >>> collate_fn(batch) - Point(x=[1, 3], y=[2, 4]) + Data(x=[1, 3], y=[2, 4]) >>> collate_fn(tuple(batch)) - Point(x=(1, 3), y=(2, 4)) + Data(x=(1, 3), y=(2, 4)) + + >>> from paderbox.array.sparse import zeros + >>> batch = [zeros(10), zeros(20)] + >>> collate_fn(batch) + [SparseArray(shape=(10,)), SparseArray(shape=(20,))] + >>> batch = [Data(zeros(1), zeros(1)), Data(zeros(1), zeros(1))] + >>> collate_fn(batch) + Data(x=[SparseArray(shape=(1,)), SparseArray(shape=(1,))], y=[SparseArray(shape=(1,)), SparseArray(shape=(1,))]) """ assert isinstance(batch, (tuple, list)), (type(batch), batch) - if isinstance(batch[0], dict): + e = batch[0] + + if isinstance(e, dict): for b in batch[1:]: - assert batch[0].keys() == b.keys(), batch - return batch[0].__class__({ - k: (collate_fn(batch.__class__([b[k] for b in batch]))) - for k in batch[0] + assert b.keys() == e.keys(), batch + return e.__class__({ + k: collate_fn(batch.__class__([b[k] for b in batch])) + for k in e }) - elif hasattr(batch[0], '__dataclass_fields__'): + elif ( + hasattr(e, '__dataclass_fields__') + # Specifically ignore SparseArray, which is a dataclass but should be treated as an array here + and f'{e.__class__.__module__}.{e.__class__.__qualname__}' != 'paderbox.array.sparse.SparseArray' + ): for b in batch[1:]: - assert batch[0].__dataclass_fields__ == b.__dataclass_fields__, batch - return batch[0].__class__(**{ - k: (collate_fn(batch.__class__([getattr(b, k) for b in batch]))) - for k in batch[0].__dataclass_fields__ + assert b.__dataclass_fields__ == e.__dataclass_fields__, batch + return e.__class__(**{ + k: collate_fn(batch.__class__([getattr(b, k) for b in batch])) + for k in e.__dataclass_fields__ }) else: return batch From 9f24a8d8b3f5ef34095b83dd3aea2d97040ce5ba Mon Sep 17 00:00:00 2001 From: Thilo von Neumann Date: Tue, 23 May 2023 12:28:45 +0200 Subject: [PATCH 2/3] Install wheel in tests --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f1d60ed5..c386f386 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -29,6 +29,8 @@ jobs: pip install flake8 pytest pytest-cov codecov if [ -f requirements.txt ]; then pip install -r requirements.txt; fi pip install numpy scipy Cython + # https://github.com/pypa/pip/issues/12030#issuecomment-1546344047 + python -m pip install wheel pip install --editable .[all] - name: Lint with flake8 run: | From 95ea8f8e528735a57327f51bfef944799f1e9393 Mon Sep 17 00:00:00 2001 From: Thilo von Neumann Date: Tue, 23 May 2023 12:33:28 +0200 Subject: [PATCH 3/3] Remove azure-pipelines.yml --- azure-pipelines.yml | 61 --------------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index ddabb917..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,61 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python -# -# See https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/hosted?view=azure-devops -# for vmImage. e.g. ubuntu-18.04, ubuntu-latest, ... -# Note: ubuntu-latest is not ubuntu-18.04. But I do not know why. - -trigger: -- master - -strategy: - matrix: - Python37: - IMAGE_NAME: 'ubuntu-18.04' - python.version: '3.7' - Python38: - IMAGE_NAME: 'ubuntu-18.04' - python.version: '3.8' -pool: - vmImage: $(IMAGE_NAME) - -steps: -- task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - displayName: 'Use Python $(python.version)' - -- script: | - # Print cmd before executing - trap 'echo -e "$ $BASH_COMMAND"' DEBUG - lsb_release -a - python --version - displayName: 'Show versions' -- script: | - sudo apt-get install libsndfile1 - python -m pip install --upgrade pip - # pip install -r requirements.txt - pip install numpy scipy Cython - pip install --editable .[test] - displayName: 'Install dependencies' - -- script: | - pip install pytest pytest-azurepipelines - pip install pytest-cov - python -m coverage xml --include="padertorch*" - pytest -v "tests/" "padertorch/" - displayName: 'pytest' - -- task: PublishTestResults@2 - condition: succeededOrFailed() - inputs: - testResultsFiles: '**/test-*.xml' - testRunTitle: 'Publish test results for Python $(python.version)' - -- task: PublishCodeCoverageResults@1 - inputs: - codeCoverageTool: Cobertura - summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml' - reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov' \ No newline at end of file