From b13019491d0f91a9b932fde12955434c21ca63b9 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 20 Oct 2023 12:15:05 -0400 Subject: [PATCH 01/58] Audiocraft CLI Directory & ReadME --- extensions/labgraph_audiogen/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 extensions/labgraph_audiogen/README.md diff --git a/extensions/labgraph_audiogen/README.md b/extensions/labgraph_audiogen/README.md new file mode 100644 index 00000000..b287e7db --- /dev/null +++ b/extensions/labgraph_audiogen/README.md @@ -0,0 +1 @@ +# Audiocraft CLI ReadME \ No newline at end of file From fa305d454742ff68982f7296ac1d2ba48460b223 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 20 Oct 2023 12:24:09 -0400 Subject: [PATCH 02/58] initial files --- extensions/labgraph_audiogen/labgraph_audiogen/__init__.py | 0 extensions/labgraph_audiogen/labgraph_audiogen/main.py | 0 extensions/labgraph_audiogen/setup.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 extensions/labgraph_audiogen/labgraph_audiogen/__init__.py create mode 100644 extensions/labgraph_audiogen/labgraph_audiogen/main.py create mode 100644 extensions/labgraph_audiogen/setup.py diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/__init__.py b/extensions/labgraph_audiogen/labgraph_audiogen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/labgraph_audiogen/setup.py b/extensions/labgraph_audiogen/setup.py new file mode 100644 index 00000000..e69de29b From 3239fb8ec05b99866e2c2cae8f5ed79f9d9234aa Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 20 Oct 2023 12:40:01 -0400 Subject: [PATCH 03/58] Adds package setup & main driver code --- .../labgraph_audiogen/labgraph_audiogen/main.py | 7 +++++++ extensions/labgraph_audiogen/setup.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py index e69de29b..63a4b8a0 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/labgraph_audiogen/labgraph_audiogen/main.py @@ -0,0 +1,7 @@ +import sys + +def main(args=None): + if args is None: + args = sys.argv[1:] + + print(f"Hello World from labgraph_audiogen with args: {' '.join(args)}") \ No newline at end of file diff --git a/extensions/labgraph_audiogen/setup.py b/extensions/labgraph_audiogen/setup.py index e69de29b..546f39d9 100644 --- a/extensions/labgraph_audiogen/setup.py +++ b/extensions/labgraph_audiogen/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup + +setup( + name='labgraph_audiogen', + version='0.1', + description="Audio generation on labgraph", + packages=['labgraph_audiogen'], + install_requires=[ + 'Click', + ], + entry_points=''' + [console_scripts] + labgraph_audiogen=labgraph_audiogen.main:main + ''', +) \ No newline at end of file From 3abca9933373a11b3985042ca53321bd3768f6a5 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 20 Oct 2023 12:46:23 -0400 Subject: [PATCH 04/58] Adds initial testing framework pytest --- extensions/labgraph_audiogen/tests/test_main.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 extensions/labgraph_audiogen/tests/test_main.py diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/labgraph_audiogen/tests/test_main.py new file mode 100644 index 00000000..2cbd1c8e --- /dev/null +++ b/extensions/labgraph_audiogen/tests/test_main.py @@ -0,0 +1,5 @@ +def test_main(): + import subprocess + process = subprocess.run(["labgraph_audiogen", "arg1", "arg2"], + capture_output=True, text=True) + assert process.stdout.strip() == "Hello World from labgraph_audiogen with args: arg1 arg2" \ No newline at end of file From c8d68760827fc4d127adc7a358168672e700d0f6 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 20 Oct 2023 13:09:46 -0400 Subject: [PATCH 05/58] Test initial workflow for AudioGen --- .github/workflows/labgraph_audiogen.yml | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/labgraph_audiogen.yml diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml new file mode 100644 index 00000000..b133d7ac --- /dev/null +++ b/.github/workflows/labgraph_audiogen.yml @@ -0,0 +1,28 @@ +name: AudioGen Tests + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + cd extensions/labgraph_audiogen + python -m pip install --upgrade pip + pip install -e . + pip install pytest + + - name: Run tests + run: | + cd extensions/labgraph_audiogen + pytest \ No newline at end of file From 5a7e045e094822092277a28a4928ef6cc07e1119 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 22:35:25 -0400 Subject: [PATCH 06/58] improved setup + added requirements --- extensions/labgraph_audiogen/setup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extensions/labgraph_audiogen/setup.py b/extensions/labgraph_audiogen/setup.py index 546f39d9..35f3a842 100644 --- a/extensions/labgraph_audiogen/setup.py +++ b/extensions/labgraph_audiogen/setup.py @@ -1,12 +1,14 @@ -from setuptools import setup +from setuptools import setup, find_packages setup( name='labgraph_audiogen', version='0.1', description="Audio generation on labgraph", - packages=['labgraph_audiogen'], + packages=find_packages(), install_requires=[ 'Click', + "torchaudio", + "audiocraft", ], entry_points=''' [console_scripts] From d9b6c4853f99089a13f2623f1461c5b89702430c Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 22:36:34 -0400 Subject: [PATCH 07/58] Implements --description & --duration for audiogen --- .../labgraph_audiogen/main.py | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py index 63a4b8a0..bc902e83 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/labgraph_audiogen/labgraph_audiogen/main.py @@ -1,7 +1,32 @@ import sys +import argparse +import torchaudio +from audiocraft.models import AudioGen +from audiocraft.data.audio import audio_write def main(args=None): + # Parse arguments + parser = argparse.ArgumentParser(description='Generate audio from descriptions using Audiocraft\'s AudioGen.') + parser.add_argument('--description', nargs='+', type=str, help='Description of the generated audio.') + parser.add_argument('--duration', type=int, default=5, help='Duration of the generated audio.') if args is None: - args = sys.argv[1:] + args = parser.parse_args() + else: + args = parser.parse_args(args) - print(f"Hello World from labgraph_audiogen with args: {' '.join(args)}") \ No newline at end of file + print(f"Running labgraph_audiogen with description: {args.description}") + + # Load Audiocraft's AudioGen model and set generation params. + model = AudioGen.get_pretrained('facebook/audiogen-medium') + model.set_generation_params(duration=args.duration) + + # Generate audio from the description + wav = model.generate(args.description) + + # Save the generated audios. + for idx, one_wav in enumerate(wav): + # Will save under {idx}.wav, with loudness normalization at -14 db LUFS. + audio_write(f'{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) + +if __name__ == "__main__": + main() \ No newline at end of file From a6eeb30a5a0e1de6d7545b10c8c050be3b6cd607 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 22:48:52 -0400 Subject: [PATCH 08/58] tests the creation of the audio file with desc --- extensions/labgraph_audiogen/tests/test_main.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/labgraph_audiogen/tests/test_main.py index 2cbd1c8e..f751f270 100644 --- a/extensions/labgraph_audiogen/tests/test_main.py +++ b/extensions/labgraph_audiogen/tests/test_main.py @@ -1,5 +1,13 @@ +import os +import subprocess + def test_main(): - import subprocess - process = subprocess.run(["labgraph_audiogen", "arg1", "arg2"], - capture_output=True, text=True) - assert process.stdout.strip() == "Hello World from labgraph_audiogen with args: arg1 arg2" \ No newline at end of file + # Run the script with an example description + process = subprocess.run(["labgraph_audiogen", "--description", "dog barking"], capture_output=True, text=True) + + # Assert that the script ran successfully + assert process.returncode == 0, f"Script returned {process.returncode}, expected 0. stdout: {process.stdout}, stderr: {process.stderr}" + + # Assert that the output file was created + assert os.path.exists("0.wav"), "Output file 0.wav was not created" + os.remove("0.wav") \ No newline at end of file From 091c0a4d77cde7a9127fa55c7790b06ea0773a86 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 22:58:22 -0400 Subject: [PATCH 09/58] test workflow with torch install before audiocraft --- .github/workflows/labgraph_audiogen.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index b133d7ac..916f23f2 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -19,6 +19,7 @@ jobs: run: | cd extensions/labgraph_audiogen python -m pip install --upgrade pip + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 pip install -e . pip install pytest From c314e4f57fcbdce2f0cd001113e1c3489c6e9940 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 23:05:24 -0400 Subject: [PATCH 10/58] test workflow with different distribution of torch --- .github/workflows/labgraph_audiogen.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index 916f23f2..40200f6a 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -19,7 +19,8 @@ jobs: run: | cd extensions/labgraph_audiogen python -m pip install --upgrade pip - pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 + pip install https://download.pytorch.org/whl/cu118/torch-2.0.1%2Bcu118-cp310-cp310-linux_x86_64.whl\#sha256\=a7a49d459bf4862f64f7bc1a68beccf8881c2fa9f3e0569608e16ba6f85ebf7b + pip install https://download.pytorch.org/whl/cu118/torchaudio-2.0.2%2Bcu118-cp310-cp310-linux_x86_64.whl\#sha256\=26692645ea061a005c57ec581a2d0425210ac6ba9f923edf11cc9b0ef3a111e9 pip install -e . pip install pytest From 6a0ce7362667eecba1934167324e6496cfc2045a Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 23:08:18 -0400 Subject: [PATCH 11/58] [workflow] - try raw torch, vision, audio --- .github/workflows/labgraph_audiogen.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index 40200f6a..350862de 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -19,8 +19,7 @@ jobs: run: | cd extensions/labgraph_audiogen python -m pip install --upgrade pip - pip install https://download.pytorch.org/whl/cu118/torch-2.0.1%2Bcu118-cp310-cp310-linux_x86_64.whl\#sha256\=a7a49d459bf4862f64f7bc1a68beccf8881c2fa9f3e0569608e16ba6f85ebf7b - pip install https://download.pytorch.org/whl/cu118/torchaudio-2.0.2%2Bcu118-cp310-cp310-linux_x86_64.whl\#sha256\=26692645ea061a005c57ec581a2d0425210ac6ba9f923edf11cc9b0ef3a111e9 + pip install torch torchvision torchaudio pip install -e . pip install pytest From 3c32fdab13dda7686c1420f32c65c2256352fa23 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 23:13:41 -0400 Subject: [PATCH 12/58] [workflow] - Try downgrading Python --- .github/workflows/labgraph_audiogen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index 350862de..001d337c 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -13,7 +13,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: '3.10' - name: Install dependencies run: | From dc2419cf99e7f16db18f9ce8ff404a964228a8d5 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 23:39:31 -0400 Subject: [PATCH 13/58] [workflow] - downgrade to match audiocraft + index --- .github/workflows/labgraph_audiogen.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index 001d337c..d48a8aed 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -13,17 +13,18 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: '3.10' + python-version: '3.8' - name: Install dependencies run: | cd extensions/labgraph_audiogen python -m pip install --upgrade pip - pip install torch torchvision torchaudio + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install --pre xformers pip install -e . pip install pytest - name: Run tests run: | cd extensions/labgraph_audiogen - pytest \ No newline at end of file + pytest --verbose \ No newline at end of file From 15bf85e5468baed92dd9bb9aaf8a6ff0151e1c7c Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 26 Oct 2023 23:47:30 -0400 Subject: [PATCH 14/58] [Workflow] adds triple verbose to pytest --- .github/workflows/labgraph_audiogen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index d48a8aed..aae373ba 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -27,4 +27,4 @@ jobs: - name: Run tests run: | cd extensions/labgraph_audiogen - pytest --verbose \ No newline at end of file + pytest -vvv \ No newline at end of file From 3e90fce510489586e186195471623c1f3e827773 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 27 Oct 2023 00:11:10 -0400 Subject: [PATCH 15/58] tries self-hosted runner on Google Colab --- .github/workflows/labgraph_audiogen.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index aae373ba..c9546ec4 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -4,7 +4,7 @@ on: [push] jobs: build: - runs-on: ubuntu-latest + runs-on: self-hosted steps: - name: Checkout code From fbed041d6e046cbd55a8265e47603e0a624d9922 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 27 Oct 2023 00:21:18 -0400 Subject: [PATCH 16/58] test only file creation --- extensions/labgraph_audiogen/tests/test_main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/labgraph_audiogen/tests/test_main.py index f751f270..4b2dfe64 100644 --- a/extensions/labgraph_audiogen/tests/test_main.py +++ b/extensions/labgraph_audiogen/tests/test_main.py @@ -5,9 +5,6 @@ def test_main(): # Run the script with an example description process = subprocess.run(["labgraph_audiogen", "--description", "dog barking"], capture_output=True, text=True) - # Assert that the script ran successfully - assert process.returncode == 0, f"Script returned {process.returncode}, expected 0. stdout: {process.stdout}, stderr: {process.stderr}" - # Assert that the output file was created assert os.path.exists("0.wav"), "Output file 0.wav was not created" os.remove("0.wav") \ No newline at end of file From 4e4f97c5f52ff413a208c8832b92f200f63b838d Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 12:07:10 -0400 Subject: [PATCH 17/58] Refactors code, changes argparse to @click, Docstr --- .../labgraph_audiogen/main.py | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py index bc902e83..504fafc3 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/labgraph_audiogen/labgraph_audiogen/main.py @@ -1,32 +1,46 @@ -import sys -import argparse -import torchaudio +import click +import torch from audiocraft.models import AudioGen from audiocraft.data.audio import audio_write -def main(args=None): - # Parse arguments - parser = argparse.ArgumentParser(description='Generate audio from descriptions using Audiocraft\'s AudioGen.') - parser.add_argument('--description', nargs='+', type=str, help='Description of the generated audio.') - parser.add_argument('--duration', type=int, default=5, help='Duration of the generated audio.') - if args is None: - args = parser.parse_args() - else: - args = parser.parse_args(args) +DEFAULT_AUDIOGEN_MODEL = 'facebook/audiogen-medium' +DEFAULT_AUDIO_DURATION = 5 - print(f"Running labgraph_audiogen with description: {args.description}") +@click.command() +@click.argument('description', nargs=-1, required=True) +@click.option('--duration', '-d', default=DEFAULT_AUDIO_DURATION, help='Duration of the generated audio.') +@click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') +@click.option('--output', '-o', help='Name of the output file.') +def parse_arguments(description, duration, model, output): + """ + Generates audio from description using Audiocraft's AudioGen. + """ + description = ' '.join(description) + if output is None: + output = description[:10] + + run_audio_generation(description, duration, model, output) + + +def run_audio_generation(description, duration, model_name, output): + """ + Load Audiocraft's AudioGen model and generate audio from the description. + + :param description: The parsed arguments. + :param duration: Duration of the generated audio. + :param model_name: Name of the Audiocraft AudioGen model to use. + :param output: Name of the output file. + """ + print(f"Running labgraph_audiogen with description: {description}") # Load Audiocraft's AudioGen model and set generation params. - model = AudioGen.get_pretrained('facebook/audiogen-medium') - model.set_generation_params(duration=args.duration) + model = AudioGen.get_pretrained(model_name) + model.set_generation_params(duration=duration) # Generate audio from the description - wav = model.generate(args.description) + wav = model.generate([description]) # Save the generated audios. for idx, one_wav in enumerate(wav): - # Will save under {idx}.wav, with loudness normalization at -14 db LUFS. - audio_write(f'{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) - -if __name__ == "__main__": - main() \ No newline at end of file + # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. + audio_write(f'{output}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) \ No newline at end of file From 3532104bb13f1d99973072ac3cd4c1e5f88d9b7d Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 12:07:30 -0400 Subject: [PATCH 18/58] Changes entry point --- extensions/labgraph_audiogen/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/labgraph_audiogen/setup.py b/extensions/labgraph_audiogen/setup.py index 35f3a842..6e889587 100644 --- a/extensions/labgraph_audiogen/setup.py +++ b/extensions/labgraph_audiogen/setup.py @@ -12,6 +12,6 @@ ], entry_points=''' [console_scripts] - labgraph_audiogen=labgraph_audiogen.main:main + labgraph_audiogen=labgraph_audiogen.main:parse_arguments ''', ) \ No newline at end of file From f4e3ca6c46f74547267eb38fbe66f55ff6a1a400 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 12:53:13 -0400 Subject: [PATCH 19/58] adds batch functionality with file input --- .../labgraph_audiogen/main.py | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py index 504fafc3..cbf26c42 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/labgraph_audiogen/labgraph_audiogen/main.py @@ -7,40 +7,49 @@ DEFAULT_AUDIO_DURATION = 5 @click.command() -@click.argument('description', nargs=-1, required=True) +@click.argument('description', nargs=-1, required=False) @click.option('--duration', '-d', default=DEFAULT_AUDIO_DURATION, help='Duration of the generated audio.') @click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') @click.option('--output', '-o', help='Name of the output file.') -def parse_arguments(description, duration, model, output): +@click.option('--batch', type=click.Path(), help='File name for batch audio description.') +def parse_arguments(description, duration, model, output, batch): """ Generates audio from description using Audiocraft's AudioGen. """ - description = ' '.join(description) - if output is None: - output = description[:10] - - run_audio_generation(description, duration, model, output) - - -def run_audio_generation(description, duration, model_name, output): + if batch: + try: + with open(batch, 'r') as f: + descriptions = [line.strip() for line in f.readlines()] + except FileNotFoundError: + print(f"File {batch} not found. Please check the file path and try again.") + else: + if not description: + raise click.BadParameter("Description argument is required when not using --batch.") + descriptions = [' '.join(description)] + + run_audio_generation(descriptions, duration, model, output) + +def run_audio_generation(descriptions, duration, model_name, output): """ Load Audiocraft's AudioGen model and generate audio from the description. - :param description: The parsed arguments. + :param descriptions: The parsed arguments. :param duration: Duration of the generated audio. :param model_name: Name of the Audiocraft AudioGen model to use. :param output: Name of the output file. """ - print(f"Running labgraph_audiogen with description: {description}") + print(f"Running labgraph_audiogen with descriptions: {descriptions}") # Load Audiocraft's AudioGen model and set generation params. model = AudioGen.get_pretrained(model_name) model.set_generation_params(duration=duration) - # Generate audio from the description - wav = model.generate([description]) - + # Generate audio from the descriptions + wav = model.generate(descriptions) + batch_output = output # Save the generated audios. for idx, one_wav in enumerate(wav): # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. - audio_write(f'{output}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) \ No newline at end of file + if not output: + batch_output = descriptions[idx].replace(' ', '_') + audio_write(f'{batch_output}{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) \ No newline at end of file From 10cc1bebe94b39e08b3f96f40e0807b2c7457e1c Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 13:01:39 -0400 Subject: [PATCH 20/58] Checks if file was created --- .github/workflows/labgraph_audiogen.yml | 2 +- extensions/labgraph_audiogen/tests/test_main.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index c9546ec4..aae373ba 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -4,7 +4,7 @@ on: [push] jobs: build: - runs-on: self-hosted + runs-on: ubuntu-latest steps: - name: Checkout code diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/labgraph_audiogen/tests/test_main.py index 4b2dfe64..16714bd5 100644 --- a/extensions/labgraph_audiogen/tests/test_main.py +++ b/extensions/labgraph_audiogen/tests/test_main.py @@ -1,10 +1,10 @@ import os import subprocess -def test_main(): +def test_single_description(): # Run the script with an example description - process = subprocess.run(["labgraph_audiogen", "--description", "dog barking"], capture_output=True, text=True) + process = subprocess.run(["labgraph_audiogen", "dog barking", "dog barking"], capture_output=True, text=True) # Assert that the output file was created - assert os.path.exists("0.wav"), "Output file 0.wav was not created" - os.remove("0.wav") \ No newline at end of file + assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" + os.remove("dog_barking0.wav") \ No newline at end of file From 730579ba69ca66965658c94d3908aa6476f4fd9b Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 13:23:57 -0400 Subject: [PATCH 21/58] linting + consistency --- extensions/labgraph_audiogen/labgraph_audiogen/main.py | 8 ++++---- extensions/labgraph_audiogen/tests/test_main.py | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/labgraph_audiogen/labgraph_audiogen/main.py index cbf26c42..2e1bd8be 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/labgraph_audiogen/labgraph_audiogen/main.py @@ -11,14 +11,14 @@ @click.option('--duration', '-d', default=DEFAULT_AUDIO_DURATION, help='Duration of the generated audio.') @click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') @click.option('--output', '-o', help='Name of the output file.') -@click.option('--batch', type=click.Path(), help='File name for batch audio description.') +@click.option('--batch', '-b', type=click.Path(), help='File name for batch audio description.') def parse_arguments(description, duration, model, output, batch): """ Generates audio from description using Audiocraft's AudioGen. """ if batch: try: - with open(batch, 'r') as f: + with open(batch, mode='r', encoding='utf-8') as f: descriptions = [line.strip() for line in f.readlines()] except FileNotFoundError: print(f"File {batch} not found. Please check the file path and try again.") @@ -26,7 +26,6 @@ def parse_arguments(description, duration, model, output, batch): if not description: raise click.BadParameter("Description argument is required when not using --batch.") descriptions = [' '.join(description)] - run_audio_generation(descriptions, duration, model, output) def run_audio_generation(descriptions, duration, model_name, output): @@ -52,4 +51,5 @@ def run_audio_generation(descriptions, duration, model_name, output): # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. if not output: batch_output = descriptions[idx].replace(' ', '_') - audio_write(f'{batch_output}{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) \ No newline at end of file + audio_write(f'{batch_output}{idx}', one_wav.cpu(), + model.sample_rate, strategy="loudness", loudness_compressor=True) diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/labgraph_audiogen/tests/test_main.py index 16714bd5..1763c56d 100644 --- a/extensions/labgraph_audiogen/tests/test_main.py +++ b/extensions/labgraph_audiogen/tests/test_main.py @@ -2,9 +2,12 @@ import subprocess def test_single_description(): + ''' + Tests output with a single description + ''' # Run the script with an example description - process = subprocess.run(["labgraph_audiogen", "dog barking", "dog barking"], capture_output=True, text=True) - + subprocess.run(["labgraph_audiogen", "dog barking"], + capture_output=True, text=True, check=False) # Assert that the output file was created assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" - os.remove("dog_barking0.wav") \ No newline at end of file + os.remove("dog_barking0.wav") From acb9b644f2ae6d6b9176901237e4e42bff7f94cb Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 3 Nov 2023 13:24:23 -0400 Subject: [PATCH 22/58] README instructions --- extensions/labgraph_audiogen/README.md | 49 +++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/extensions/labgraph_audiogen/README.md b/extensions/labgraph_audiogen/README.md index b287e7db..91ab73c6 100644 --- a/extensions/labgraph_audiogen/README.md +++ b/extensions/labgraph_audiogen/README.md @@ -1 +1,48 @@ -# Audiocraft CLI ReadME \ No newline at end of file +# Audiogen + +Audiogen is a Python command-line tool that uses models from Audiocraft's AudioGen to generate audio from specified descriptions. This tool can generate a single piece of audio based on a specific description or multiple pieces of audio based on a batch file containing multiple descriptions. + +## Features + +* Ability to specify duration of the generated audio. +* Ability to generate audio based on a batch file. +* Ability to specify the model to be used for the audio generation. +* Ability to set the output file name. + +## Setup + +Audiocraft needs Python 3.8 or higher to run. If you have a suitable version of Python installed, you can install Audiogen with pip: + +```shell +pip install -e . +``` + +## Usage + +### Command-line interface + +The CLI usage for Audiogen is `labgraph_audiogen [OPTIONS] [DESCRIPTION]...`. + +### Options + +* `description`: the description based on which the audio is to be generated. +* `duration, -d`: duration of the generated audio, default is 5. +* `model, -m`: name of the Audiocraft AudioGen model to use, default is 'facebook/audiogen-medium'. +* `output, -o`: name of the output file. +* `batch`: file name for batch audio description. + +### Example + +To generate an audio file you would use the following command: + +```shell +labgraph_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' + +labgraph_audiogen 'dog barking' + +labgraph_audiogen -b 'batch.txt' +``` + +## Error Handling + +If the batch file is not found, a notable error message will be presented. Moreover, if a description is not provided when not using a batch file, a misusage error will be raised. From a11e54dbc86a242ff86f195c160b03191e5627ce Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 19:12:59 -0500 Subject: [PATCH 23/58] Switch from labgraph_audiogen to lg_audiogen --- .../{labgraph_audiogen => lg_audiogen}/README.md | 8 ++++---- .../lg_audiogen}/__init__.py | 0 .../lg_audiogen}/main.py | 10 +++++----- extensions/{labgraph_audiogen => lg_audiogen}/setup.py | 4 ++-- .../tests/test_main.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) rename extensions/{labgraph_audiogen => lg_audiogen}/README.md (86%) rename extensions/{labgraph_audiogen/labgraph_audiogen => lg_audiogen/lg_audiogen}/__init__.py (100%) rename extensions/{labgraph_audiogen/labgraph_audiogen => lg_audiogen/lg_audiogen}/main.py (88%) rename extensions/{labgraph_audiogen => lg_audiogen}/setup.py (75%) rename extensions/{labgraph_audiogen => lg_audiogen}/tests/test_main.py (87%) diff --git a/extensions/labgraph_audiogen/README.md b/extensions/lg_audiogen/README.md similarity index 86% rename from extensions/labgraph_audiogen/README.md rename to extensions/lg_audiogen/README.md index 91ab73c6..79b5badf 100644 --- a/extensions/labgraph_audiogen/README.md +++ b/extensions/lg_audiogen/README.md @@ -21,7 +21,7 @@ pip install -e . ### Command-line interface -The CLI usage for Audiogen is `labgraph_audiogen [OPTIONS] [DESCRIPTION]...`. +The CLI usage for Audiogen is `lg_audiogen [OPTIONS] [DESCRIPTION]...`. ### Options @@ -36,11 +36,11 @@ The CLI usage for Audiogen is `labgraph_audiogen [OPTIONS] [DESCRIPTION]...`. To generate an audio file you would use the following command: ```shell -labgraph_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' +lg_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' -labgraph_audiogen 'dog barking' +lg_audiogen 'dog barking' -labgraph_audiogen -b 'batch.txt' +lg_audiogen -b 'batch.txt' ``` ## Error Handling diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/__init__.py b/extensions/lg_audiogen/lg_audiogen/__init__.py similarity index 100% rename from extensions/labgraph_audiogen/labgraph_audiogen/__init__.py rename to extensions/lg_audiogen/lg_audiogen/__init__.py diff --git a/extensions/labgraph_audiogen/labgraph_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py similarity index 88% rename from extensions/labgraph_audiogen/labgraph_audiogen/main.py rename to extensions/lg_audiogen/lg_audiogen/main.py index 2e1bd8be..6e738e23 100644 --- a/extensions/labgraph_audiogen/labgraph_audiogen/main.py +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -32,12 +32,12 @@ def run_audio_generation(descriptions, duration, model_name, output): """ Load Audiocraft's AudioGen model and generate audio from the description. - :param descriptions: The parsed arguments. - :param duration: Duration of the generated audio. - :param model_name: Name of the Audiocraft AudioGen model to use. - :param output: Name of the output file. + @param descriptions: The parsed arguments. + @param duration: Duration of the generated audio. + @param model_name: Name of the Audiocraft AudioGen model to use. + @param output: Name of the output file. """ - print(f"Running labgraph_audiogen with descriptions: {descriptions}") + print(f"Running lg_audiogen with descriptions: {descriptions}") # Load Audiocraft's AudioGen model and set generation params. model = AudioGen.get_pretrained(model_name) diff --git a/extensions/labgraph_audiogen/setup.py b/extensions/lg_audiogen/setup.py similarity index 75% rename from extensions/labgraph_audiogen/setup.py rename to extensions/lg_audiogen/setup.py index 6e889587..f1e03431 100644 --- a/extensions/labgraph_audiogen/setup.py +++ b/extensions/lg_audiogen/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages setup( - name='labgraph_audiogen', + name='lg_audiogen', version='0.1', description="Audio generation on labgraph", packages=find_packages(), @@ -12,6 +12,6 @@ ], entry_points=''' [console_scripts] - labgraph_audiogen=labgraph_audiogen.main:parse_arguments + lg_audiogen=lg_audiogen.main:parse_arguments ''', ) \ No newline at end of file diff --git a/extensions/labgraph_audiogen/tests/test_main.py b/extensions/lg_audiogen/tests/test_main.py similarity index 87% rename from extensions/labgraph_audiogen/tests/test_main.py rename to extensions/lg_audiogen/tests/test_main.py index 1763c56d..c398fcaf 100644 --- a/extensions/labgraph_audiogen/tests/test_main.py +++ b/extensions/lg_audiogen/tests/test_main.py @@ -6,7 +6,7 @@ def test_single_description(): Tests output with a single description ''' # Run the script with an example description - subprocess.run(["labgraph_audiogen", "dog barking"], + subprocess.run(["lg_audiogen", "dog barking"], capture_output=True, text=True, check=False) # Assert that the output file was created assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" From 1317661db46fe1fcae00216bd2fcae9c048a5620 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 19:44:08 -0500 Subject: [PATCH 24/58] Add versions + Improve descriptions --- extensions/lg_audiogen/setup.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py index f1e03431..220198d5 100644 --- a/extensions/lg_audiogen/setup.py +++ b/extensions/lg_audiogen/setup.py @@ -3,12 +3,17 @@ setup( name='lg_audiogen', version='0.1', - description="Audio generation on labgraph", + description="A Command-line interface to use Audiocraft for labgraph", + long_description=""" + A Command-line interface to facilitate the usage of Audiocraft's models + to generate and process audio on labgraph + """, packages=find_packages(), install_requires=[ - 'Click', - "torchaudio", - "audiocraft", + "Click>=8.1.7", + "torch>=2.1.0", + "torchaudio>=2.1.0", + "audiocraft==1.1.0", ], entry_points=''' [console_scripts] From 9d5bebfab7310ab27a370ccbc494867fa56feee3 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 19:44:40 -0500 Subject: [PATCH 25/58] Adds ffmpeg to fix workflow --- .github/workflows/labgraph_audiogen.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index aae373ba..3a68339d 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -19,6 +19,7 @@ jobs: run: | cd extensions/labgraph_audiogen python -m pip install --upgrade pip + sudo apt-get install ffmpeg pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install --pre xformers pip install -e . From ecb2d04ede78276a2a5c2b185cc8cf15c516ca29 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 19:48:34 -0500 Subject: [PATCH 26/58] fix package name to lg_audiogen --- .github/workflows/labgraph_audiogen.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/labgraph_audiogen.yml b/.github/workflows/labgraph_audiogen.yml index 3a68339d..57bc4372 100644 --- a/.github/workflows/labgraph_audiogen.yml +++ b/.github/workflows/labgraph_audiogen.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: | - cd extensions/labgraph_audiogen + cd extensions/lg_audiogen python -m pip install --upgrade pip sudo apt-get install ffmpeg pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu @@ -27,5 +27,5 @@ jobs: - name: Run tests run: | - cd extensions/labgraph_audiogen + cd extensions/lg_audiogen pytest -vvv \ No newline at end of file From 94aee5c3fa3f1e2705af22844c165609a24fb9d6 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 20:18:47 -0500 Subject: [PATCH 27/58] Adds O.S Support on ReadME --- extensions/lg_audiogen/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md index 79b5badf..5a3ca7d9 100644 --- a/extensions/lg_audiogen/README.md +++ b/extensions/lg_audiogen/README.md @@ -43,6 +43,10 @@ lg_audiogen 'dog barking' lg_audiogen -b 'batch.txt' ``` +## O.S Support + +```Tested on Ubuntu 22.04 (Jammy) LTS``` + ## Error Handling If the batch file is not found, a notable error message will be presented. Moreover, if a description is not provided when not using a batch file, a misusage error will be raised. From d5e347ac8e9352eea517d14d8e3e4346d9dd3a4a Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Thu, 16 Nov 2023 21:27:57 -0500 Subject: [PATCH 28/58] Improve ReadME with samples + batch instructions --- extensions/lg_audiogen/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md index 5a3ca7d9..dad65c03 100644 --- a/extensions/lg_audiogen/README.md +++ b/extensions/lg_audiogen/README.md @@ -43,6 +43,22 @@ lg_audiogen 'dog barking' lg_audiogen -b 'batch.txt' ``` +### Batch File Format + +The batch file should contain one description per line. The descriptions should be in the same format as the descriptions used in the command-line interface. + +Example: + +*batch.txt* +```txt +Natural sounds of a rainforest +Bird Chirping in the background +``` + +### Samples + +[Google Drive Folder](https://drive.google.com/drive/folders/1kdWB1CBog4NGVJ7jWddKLtBAuPm3gwDq?usp=drive_link) + ## O.S Support ```Tested on Ubuntu 22.04 (Jammy) LTS``` From abfc11ee6e52c1913ac78fdbdc03f98772b6a674 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Sun, 26 Nov 2023 23:19:25 -0500 Subject: [PATCH 29/58] Add Calendar Reader Utility --- extensions/lg_audiogen/lg_audiogen/calendar_reader.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 extensions/lg_audiogen/lg_audiogen/calendar_reader.py diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py new file mode 100644 index 00000000..109c5bf2 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -0,0 +1,7 @@ +from icalendar import Calendar + +# Read the user's calendar file and parse it into an icalendar object +with open('calendar.ics', 'r') as f: + gcal = Calendar.from_ical(f.read()) + +print(gcal) \ No newline at end of file From 3d7bdf9bd320aaceecc435be23b0f848d7dabff8 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 00:10:13 -0500 Subject: [PATCH 30/58] Builds calendar event dictionary --- .../lg_audiogen/calendar_reader.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 109c5bf2..44ca45a4 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -1,7 +1,25 @@ from icalendar import Calendar +from datetime import datetime, date, timedelta # Read the user's calendar file and parse it into an icalendar object with open('calendar.ics', 'r') as f: gcal = Calendar.from_ical(f.read()) -print(gcal) \ No newline at end of file +# holds data in the format {'2023-11-06': [Event]} of the user's calendar +calendar_events = {} + +for component in gcal.walk(): + if component.name == "VEVENT": + # Extract information about the event + summary = str(component.get('summary')) + start_dt = component.get('dtstart').dt + end_dt = component.get('dtend').dt + duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes + + dt_str = start_dt.strftime('%Y-%m-%d') # Dict key is the date in the format YYYY-MM-DD + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + + calendar_events[dt_str].append({'name': summary, 'duration': duration}) + +print(calendar_events) \ No newline at end of file From 0fc95f9ad95431c33b2cdce038d3b38a3e7bd547 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 00:53:05 -0500 Subject: [PATCH 31/58] handles recurring events and set event limits --- .../lg_audiogen/calendar_reader.py | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 44ca45a4..65e33606 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -1,5 +1,9 @@ from icalendar import Calendar from datetime import datetime, date, timedelta +from dateutil.rrule import rrulestr + +MAX_YEAR = 2023 +CURRENT_YEAR = datetime.now().year # Read the user's calendar file and parse it into an icalendar object with open('calendar.ics', 'r') as f: @@ -8,6 +12,10 @@ # holds data in the format {'2023-11-06': [Event]} of the user's calendar calendar_events = {} +# We only to get the events from the current year to the max year +def is_within_limit(dt): + return CURRENT_YEAR <= dt.year <= MAX_YEAR + for component in gcal.walk(): if component.name == "VEVENT": # Extract information about the event @@ -15,11 +23,26 @@ start_dt = component.get('dtstart').dt end_dt = component.get('dtend').dt duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes - - dt_str = start_dt.strftime('%Y-%m-%d') # Dict key is the date in the format YYYY-MM-DD - if dt_str not in calendar_events: - calendar_events[dt_str] = [] - - calendar_events[dt_str].append({'name': summary, 'duration': duration}) -print(calendar_events) \ No newline at end of file + # Calculate the recurrence rule, if any + if 'rrule' in component: + rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) + for dt in rr: + if not is_within_limit(dt): + continue + dt_str = dt.strftime('%Y-%m-%d') + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + calendar_events[dt_str].append({'name': summary, 'duration': duration}) + else: + dt_str = start_dt.strftime('%Y-%m-%d') + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + calendar_events[dt_str].append({'name': summary, 'duration': duration}) + +for date, events in sorted(calendar_events.items()): + print(f"Date: {date}") + for event in events: + print(f" Event: {event['name']}") + print(f" Duration: {event['duration']} minutes") + print() \ No newline at end of file From 2d063159e31430a0eb287e349197eef02c85dd11 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 01:10:02 -0500 Subject: [PATCH 32/58] Add Year limitation for non-recurring events --- extensions/lg_audiogen/lg_audiogen/calendar_reader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 65e33606..793c5d99 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -36,6 +36,8 @@ def is_within_limit(dt): calendar_events[dt_str].append({'name': summary, 'duration': duration}) else: dt_str = start_dt.strftime('%Y-%m-%d') + if not is_within_limit(start_dt): + continue if dt_str not in calendar_events: calendar_events[dt_str] = [] calendar_events[dt_str].append({'name': summary, 'duration': duration}) From 05b95545a8b418c3247970a595ff8a94f7aeae67 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 01:26:43 -0500 Subject: [PATCH 33/58] Speeds up loop by breaking from rrule generator --- .../lg_audiogen/lg_audiogen/calendar_reader.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 793c5d99..4a2db539 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -24,16 +24,18 @@ def is_within_limit(dt): end_dt = component.get('dtend').dt duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes - # Calculate the recurrence rule, if any + # Builds up the missing events that are defined by the recurring rules + # Ex: Meetings that happen every M, W, F if 'rrule' in component: + # rr is a generator rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) - for dt in rr: - if not is_within_limit(dt): - continue - dt_str = dt.strftime('%Y-%m-%d') - if dt_str not in calendar_events: - calendar_events[dt_str] = [] - calendar_events[dt_str].append({'name': summary, 'duration': duration}) + for dt in rr: + if not is_within_limit(dt): # Year Out of bounds + break + dt_str = dt.strftime('%Y-%m-%d') + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + calendar_events[dt_str].append({'name': summary, 'duration': duration}) else: dt_str = start_dt.strftime('%Y-%m-%d') if not is_within_limit(start_dt): From 112db66991cdb3f5dbc661dfaf59222240c7bbe3 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 02:18:46 -0500 Subject: [PATCH 34/58] Refactor code into functions & remove redundancies --- .../lg_audiogen/calendar_reader.py | 85 ++++++++++--------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 4a2db539..6d3013a1 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -2,48 +2,57 @@ from datetime import datetime, date, timedelta from dateutil.rrule import rrulestr -MAX_YEAR = 2023 -CURRENT_YEAR = datetime.now().year +# Inclusive [2021, 2021] +MIN_YEAR = datetime.now().year +MAX_YEAR = MIN_YEAR -# Read the user's calendar file and parse it into an icalendar object -with open('calendar.ics', 'r') as f: - gcal = Calendar.from_ical(f.read()) +# We only to get the events from the minimum year to the max year +def is_within_limit(dt): + return MIN_YEAR <= dt.year <= MAX_YEAR -# holds data in the format {'2023-11-06': [Event]} of the user's calendar -calendar_events = {} +def populate_events(component, start_dt, calendar_events, summary, duration): + if not is_within_limit(start_dt): + return 0 + dt_str = start_dt.strftime('%Y-%m-%d') + if dt_str not in calendar_events: + calendar_events[dt_str] = [] + calendar_events[dt_str].append({'name': summary, 'duration': duration}) + return 1 -# We only to get the events from the current year to the max year -def is_within_limit(dt): - return CURRENT_YEAR <= dt.year <= MAX_YEAR - -for component in gcal.walk(): - if component.name == "VEVENT": - # Extract information about the event - summary = str(component.get('summary')) - start_dt = component.get('dtstart').dt - end_dt = component.get('dtend').dt - duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes - - # Builds up the missing events that are defined by the recurring rules - # Ex: Meetings that happen every M, W, F - if 'rrule' in component: - # rr is a generator - rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) - for dt in rr: - if not is_within_limit(dt): # Year Out of bounds - break - dt_str = dt.strftime('%Y-%m-%d') - if dt_str not in calendar_events: - calendar_events[dt_str] = [] - calendar_events[dt_str].append({'name': summary, 'duration': duration}) - else: - dt_str = start_dt.strftime('%Y-%m-%d') - if not is_within_limit(start_dt): - continue - if dt_str not in calendar_events: - calendar_events[dt_str] = [] - calendar_events[dt_str].append({'name': summary, 'duration': duration}) +def populate_recurring_events(component, start_dt, calendar_events, summary, duration): + # rr will give us a generator + rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) + for dt in rr: + if populate_events(component, dt, calendar_events, summary, duration) == 0: + return # short circuit if we're out of the range + + +def calendar_to_dictionary(filepath): + # Read the user's calendar file and parse it into an icalendar object + with open(filepath, 'r', encoding='utf-8') as f: + gcal = Calendar.from_ical(f.read()) + + # holds data in the format {'2023-11-06': [Event]} of the user's calendar + calendar_events = {} + + for component in gcal.walk(): + if component.name == "VEVENT": + # Extract information about the event + summary = str(component.get('summary')) + start_dt = component.get('dtstart').dt + end_dt = component.get('dtend').dt + duration = int((end_dt - start_dt).total_seconds() / 60) # duration in minutes + + # rrule Builds up the missing events that are defined by the recurring rules + # Ex: Meetings that happen every M, W, F + if 'rrule' in component: + populate_recurring_events(component, start_dt, calendar_events, summary, duration) + else: + populate_events(component, start_dt, calendar_events, summary, duration) + return calendar_events + +calendar_events = calendar_to_dictionary('calendar.ics') for date, events in sorted(calendar_events.items()): print(f"Date: {date}") for event in events: From 97594ddd02bc3420caa39094786a6c865e4fb9f5 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 02:39:52 -0500 Subject: [PATCH 35/58] Functions to get the events in given dates --- .../lg_audiogen/lg_audiogen/calendar_reader.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 6d3013a1..8232e572 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -51,6 +51,24 @@ def calendar_to_dictionary(filepath): populate_events(component, start_dt, calendar_events, summary, duration) return calendar_events + +def get_events_for_specific_date(calendar_events, specific_date_str): + # Assumes specific_date_str is in YYYY-MM-DD format + return calendar_events.get(specific_date_str, []) + +def get_events_between_dates(calendar_events, start_date_str, end_date_str): + # Assumes start_date_str and end_date_str are in YYYY-MM-DD format and start_date <= end_date + start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date() + end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date() + + events_between_dates = {} + current_date = start_date + while current_date <= end_date: + date_str = current_date.strftime('%Y-%m-%d') + if date_str in calendar_events: + events_between_dates[date_str] = calendar_events[date_str] + current_date += timedelta(days=1) + return events_between_dates calendar_events = calendar_to_dictionary('calendar.ics') for date, events in sorted(calendar_events.items()): From bd1b7ff32dbe7a4d75bde26caf6719c5390f42f0 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Mon, 27 Nov 2023 03:22:31 -0500 Subject: [PATCH 36/58] Adds ts to sort events to generate audio in order --- .../lg_audiogen/calendar_reader.py | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index 8232e572..f56de51d 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -1,5 +1,5 @@ from icalendar import Calendar -from datetime import datetime, date, timedelta +from datetime import datetime, date, timedelta, timezone from dateutil.rrule import rrulestr # Inclusive [2021, 2021] @@ -10,13 +10,36 @@ def is_within_limit(dt): return MIN_YEAR <= dt.year <= MAX_YEAR +def convert_to_utc(dt): + if isinstance(dt, datetime) and dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None: + # Convert offset-aware datetime to UTC + return dt.astimezone(timezone.utc) + return dt + +def datetime_to_timestamp(dt): + if isinstance(dt, datetime): + return dt.timestamp() + elif isinstance(dt, date): + return datetime.combine(dt, datetime.min.time(), tzinfo=timezone.utc).timestamp() + raise TypeError("Expected datetime.datetime or datetime.date") + def populate_events(component, start_dt, calendar_events, summary, duration): if not is_within_limit(start_dt): - return 0 - dt_str = start_dt.strftime('%Y-%m-%d') + return 0 + + # Ensure dt is converted to UTC if it's a datetime with timezone info. + utc_start_dt = convert_to_utc(start_dt) + # Create timestamp from datetime or date (for sorting later) + timestamp = datetime_to_timestamp(utc_start_dt) + + dt_str = start_dt.strftime('%Y-%m-%d') if isinstance(start_dt, date) \ + else utc_start_dt.strftime('%Y-%m-%d') + if dt_str not in calendar_events: calendar_events[dt_str] = [] - calendar_events[dt_str].append({'name': summary, 'duration': duration}) + + event = {'name': summary, 'duration': duration, 'ts': timestamp} + calendar_events[dt_str].append(event) return 1 def populate_recurring_events(component, start_dt, calendar_events, summary, duration): @@ -54,7 +77,9 @@ def calendar_to_dictionary(filepath): def get_events_for_specific_date(calendar_events, specific_date_str): # Assumes specific_date_str is in YYYY-MM-DD format - return calendar_events.get(specific_date_str, []) + day_events = calendar_events.get(specific_date_str, []) + # Sort events by timestamp key 'ts' in ascending order + return sorted(day_events, key=lambda event: event['ts']) def get_events_between_dates(calendar_events, start_date_str, end_date_str): # Assumes start_date_str and end_date_str are in YYYY-MM-DD format and start_date <= end_date @@ -66,7 +91,8 @@ def get_events_between_dates(calendar_events, start_date_str, end_date_str): while current_date <= end_date: date_str = current_date.strftime('%Y-%m-%d') if date_str in calendar_events: - events_between_dates[date_str] = calendar_events[date_str] + # Sort events for the current date by timestamp key 'ts' in ascending order + events_between_dates[date_str] = sorted(calendar_events[date_str], key=lambda event: event['ts']) current_date += timedelta(days=1) return events_between_dates From ec234f0c51142b42b9f754c8dbfecdd1b82fccaf Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 00:13:20 -0500 Subject: [PATCH 37/58] initial keyword-based prompt generator --- .../lg_audiogen/keyword_generator.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 extensions/lg_audiogen/lg_audiogen/keyword_generator.py diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py new file mode 100644 index 00000000..bae713a2 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -0,0 +1,27 @@ + +PROMPT_KEYWORDS = { + "commute": ["Sounds of cars honking", "Buzz of a busy metro", "Rhythmic chime of train tracks"], + "beach": ["Sounds of waves hitting rocks", "Seagulls in the background", "Children playing in the sand"], + "workout": ["Beats of high-energy music", "Grunts and clanks of gym equipment", "Breathless intensity of a sprint"], + "dinner": ["Clatter of cutlery on plates", "Murmur of dinner conversation", "Sizzle of food cooking"], +} + +def match_keyword(event_name): + keywords = PROMPT_KEYWORDS.keys() + for keyword in keywords: + if keyword.lower() in event_name.lower(): + return keyword + return None + + +events = [ + {"name": "Commute to work", "duration": 30}, + {"name": "Going to the beach", "duration": 120}, +] + +for event in events: + keyword = match_keyword(event["name"]) + if keyword: + print(f"Event {event['name']} matches keyword {keyword}.") + else: + print(f"Event {event['name']} does not match any keywords.") \ No newline at end of file From f9d56ee4c835c288acb724ecbb29f293728d9297 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 00:26:09 -0500 Subject: [PATCH 38/58] Keyword dict to JSON, adds file load fallback --- .../lg_audiogen/keyword_generator.py | 21 ++++++++++++------ .../static_inputs/prompt_keywords.json | 22 +++++++++++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py index bae713a2..52d487ad 100644 --- a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -1,10 +1,18 @@ +import os +import json -PROMPT_KEYWORDS = { - "commute": ["Sounds of cars honking", "Buzz of a busy metro", "Rhythmic chime of train tracks"], - "beach": ["Sounds of waves hitting rocks", "Seagulls in the background", "Children playing in the sand"], - "workout": ["Beats of high-energy music", "Grunts and clanks of gym equipment", "Breathless intensity of a sprint"], - "dinner": ["Clatter of cutlery on plates", "Murmur of dinner conversation", "Sizzle of food cooking"], -} +# This is the default keyword dictionary. It is a JSON file that maps keywords to prompts +# The CLI will allow the user to input his own dictionary of keywords +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) +KEYWORD_DICT = "/static_inputs/prompt_keywords.json" + +# First Try to load KEYWORD_DICT, if it doesn't work, try with THIS_DIR + KEYWORD_DICT +try: + PROMPT_KEYWORDS = json.load(open(KEYWORD_DICT)) +except FileNotFoundError: + PROMPT_KEYWORDS = json.load(open(THIS_DIR + KEYWORD_DICT)) +except: + raise Exception("Could not load keyword dictionary. Please check that the file exists.") def match_keyword(event_name): keywords = PROMPT_KEYWORDS.keys() @@ -13,7 +21,6 @@ def match_keyword(event_name): return keyword return None - events = [ {"name": "Commute to work", "duration": 30}, {"name": "Going to the beach", "duration": 120}, diff --git a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json new file mode 100644 index 00000000..798cdab0 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json @@ -0,0 +1,22 @@ +{ + "commute": [ + "Sounds of cars honking", + "Buzz of a busy metro", + "Rhythmic chime of train tracks" + ], + "beach": [ + "Sounds of waves hitting rocks", + "Seagulls in the background", + "Children playing in the sand" + ], + "workout": [ + "Beats of high-energy music", + "Grunts and clanks of gym equipment", + "Breathless intensity of a sprint" + ], + "dinner": [ + "Clatter of cutlery on plates", + "Murmur of dinner conversation", + "Sizzle of food cooking" + ] +} \ No newline at end of file From c694e4e1bbdce081b568279b1b77eb34a63d6935 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 00:56:06 -0500 Subject: [PATCH 39/58] Adds more variety to keywords & prompts --- .../static_inputs/prompt_keywords.json | 546 +++++++++++++++++- 1 file changed, 528 insertions(+), 18 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json index 798cdab0..286738c0 100644 --- a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json +++ b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json @@ -1,22 +1,532 @@ { + "meeting": [ + "Low hum of projector", + "Shuffling of papers and notebooks", + "Soft taps of laptop keyboards", + "Muffled voices through conference room walls", + "Clear voice of a presenter", + "Opening and closing of a meeting room door", + "Quiet whispers as side conversations", + "Scribbling of pens on flipcharts" + ], + "classes": [ + "Lecture voice echoing in a hall", + "Turning of textbook pages", + "Scratching of pens on paper during exams", + "Bubbling of a chemistry lab experiment", + "Clicks of a calculator", + "Debate and discussion amongst students", + "Laughter and chatter during break", + "Sound of a school bell indicating end of class" + ], + "swimming": [ + "Splashes of water as swimmers dive in", + "Distant whistle of the lifeguard", + "Rhythmic movements in the water", + "Gurgling of water in the pool", + "Echoes of playful shouts in the swimming area", + "Snapping of swim caps", + "Flapping of flip-flops on wet tiles", + "Water cascading from a swimmer's body" + ], + "lunch": [ + "Rustling of snack wrappers", + "Pop of a soda can opening", + "Biting into a crunchy apple", + "Laughter and chatter in the cafeteria", + "Ding of a microwave", + "Silverware clinking in a salad bowl", + "Soup slurping", + "Vending machine dispensing a snack" + ], + "yoga": [ + "Soft instrumental music", + "Deep inhaling and exhaling", + "Stretching and rolling out a yoga mat", + "Quiet voice of instructor guiding the practice", + "Gentle taps of feet on the floor", + "Rustling of comfortable clothing", + "Sound of wind chimes from an open window", + "Peaceful silence held in meditation" + ], + "grocery": [ + "Beep of the scanner at checkout", + "Crinkle of plastic grocery bags", + "Background music of the store", + "Wheels of a shopping cart rolling down aisles", + "Conversations about price checks", + "Thud of produce being weighed", + "Intercom announcements for sales", + "Refrigeration units humming in the frozen section" + ], + "gardening": [ + "Snipping of pruning shears", + "Swoosh of watering from a hose", + "Rustle of plants and leaves", + "Thud of a shovel digging into earth", + "Chirping of birds nearby", + "Creaking of a wheelbarrow", + "Buzzing of bees at work", + "Patting of soil around a newly planted flower" + ], + "birthday": [ + "Laughter and singing of Happy Birthday", + "Pop of balloons being inflated", + "Rustle of gift wrap", + "Crackling of candles being lit", + "Cheers and clapping after blowing out candles", + "Tinkle of ice cubes in party drinks", + "Crunch of chips and snacks", + "Thumping bass of celebratory music" + ], + "concert": [ + "Roar of the crowd cheering", + "Amplified strumming of guitars", + "Powerful voice of the lead singer", + "Thudding bass vibrating the floor", + "Drum solos that resonate through the arena", + "Synthesizers adding electronic harmonies", + "Clapping along to the rhythm of the song", + "Encore chants filling the venue" + ], + "study": [ + "Turning pages of a heavy textbook", + "Click-clack of a keyboard in a quiet room", + "Sighs of concentration and occasional frustration", + "Sipping of coffee or tea while pondering", + "Timer beeping at the end of a study session", + "Whispered readings of notes", + "Highlighters scribbling on key points", + "Silent ambiance of a library" + ], + "hiking": [ + "Leaves crunching underfoot on the trail", + "Bird calls echoing through the trees", + "Streams trickling over rocks", + "Branches snapping as they are moved aside", + "Heavy breathing from the ascent", + "Wind whistling past mountain peaks", + "Zippers of backpacks being opened and closed", + "Distant thunder suggesting an approaching storm" + ], + "travel": [ + "Airport announcements over the PA system", + "Luggage wheels rolling across the floor", + "Language chatter from international travelers", + "Airplane engine starting up", + "Train horn blaring as it arrives", + "Distant sounds of a city new to the traveler", + "Maps being unfolded and studied", + "Camera shutter clicking capturing memories" + ], + "cooking": [ + "Chopping vegetables on a cutting board", + "Grease popping from a hot pan", + "Timer dinging when the oven is preheated", + "Blender whirring while making sauces", + "Cork squeaking out of a wine bottle", + "Faucet running to wash produce", + "Tin foil tearing for covering dishes", + "Spices being shaken into a pot" + ], + "festival": [ + "Carnival music playing in the distance", + "Rides whirring and creaking", + "Children laughing and shouting with joy", + "Food vendors calling out specials", + "Balloons rubbing together and squeaking", + "Fireworks crackling and booming overhead", + "Drum beats from a live band", + "Crowds milling and conversing" + ], + "meditation": [ + "Chime ringing to start the session", + "Gentle bubbling of a water feature", + "Soft rustle of meditation cushions", + "Steady breathing focused and deep", + "Whispers of mantras being repeated", + "Wind rustling leaves outside", + "Silence enveloping the room", + "Faint ting of a singing bowl" + ], + "party": [ + "Upbeat music flooding the room", + "Laughter and lively conversations", + "Clinking of glasses in toasts", + "Heels clicking on the dance floor", + "Bottles opening and pouring drinks", + "Chips dipping into salsa", + "Balloons popping unexpectedly", + "Games and competitions stirring up excitement" + ], + "virtual": [ + "Keyboard tapping during an online chat", + "Mouse clicking navigating a digital interface", + "Notification pings from incoming messages", + "Microphone catching intermittent sounds of breathing", + "Silent pauses during a video call", + "Earphones rubbing against fabric", + "Computer fan whirring quietly", + "Echo of a voice due to a bad connection" + ], + "clubbing": [ + "Bass reverberating through the floor", + "Shouts and cheers to a DJ's beat drop", + "Glasses clinking at the bar", + "Heels stomping rhythmically on the dance floor", + "Securing of wristbands at the entrance", + "Stamps being pressed on hands", + "Doors opening and closing in VIP sections", + "Whistles and yells from the crowd" + ], + "stay": [ + "Keys jingling as they unlock a hotel room", + "Luggage being rolled across a lobby", + "Pages of a book flipping in a quiet space", + "Intermittent sips of a hot beverage", + "Air conditioning starting up in the background", + "Curtains sliding on their rods", + "Remote control clicking as the television turns on", + "Faucet running water into a bath" + ], + "wedding": [ + "Applause as the couple makes their entrance", + "Rice being thrown during the exit", + "Pop of a champagne cork during toasts", + "Tears being dabbed with tissues", + "Chairs scraping as guests move to dance", + "Camera shutters during the photo session", + "Zippers of dresses and the adjusting of suits", + "Laughter from shared memories and jokes" + ], + "brunch": [ + "Froth hissing from an espresso machine", + "Sizzling from breakfast items on a grill", + "Juice being poured into glasses", + "Silverware tinking against mugs for attention", + "Eggshells cracking into a hot pan", + "Baskets of pastries being unwrapped", + "Spoon clinking inside a mixing bowl", + "Toast popping up from toasters" + ], + "expo": [ + "Crowds murmuring at convention center", + "Scanner beeping over tickets and badges", + "Stalls being assembled and disassembled", + "Enthusiasts discussing over exhibited items", + "Presenters speaking through microphones", + "Demo products being tested and tapped", + "Rolling of promotional banners and signs", + "Carts carrying supplies bumping on the floor" + ], + "flight": [ + "Cabin crew walking in aisles", + "Seatbelt clicking shut", + "Overhead compartments being opened and closed", + "Pilot announcements over the intercom", + "Beverage carts rattling down the aisle", + "Aircraft accelerating on the runway", + "Air vents being adjusted", + "Soft snores of a sleeping passenger" + ], + "interview": [ + "Leather seats creaking in an office", + "Handshakes exchanging between participants", + "Writing on a notepad", + "Clothes rustling as candidates adjust in their seats", + "Doors opening and closing as people enter and leave", + "Coughs and clears of the throat nervously", + "Resume papers being shuffled and examined", + "Clock ticking, marking time passing" + ], + "seminar": [ + "Audience applause after a speaker's point", + "Attendees murmuring in agreement or curiosity", + "Microphone feedback before adjustments", + "Presenters shuffling cue cards", + "Laser pointer clicking and pointing at slides", + "Markers squeaking on a whiteboard", + "Chairs sliding as participants lean forward", + "Water bottles being opened during breaks" + ], + "picnic": [ + "Laughter and chatter in open fields", + "Cooler lids closing over ice and refreshments", + "Plastic utensils tapping on paper plates", + "Crunch of biting into fresh fruit", + "Kites whooshing in the wind", + "Dogs barking and playing", + "Blankets flapping before settling on the ground", + "Ice melting and clinking in sun-warmed glasses" + ], + "massage": [ + "Ointment tubes unscrewing and squeezing", + "Gentle tapping of a masseuse's hands", + "Quiet whir of a massage chair", + "Towels being unfolded and draped", + "Oils being warmed and swirled in burners", + "Low sighs of relaxation from clients", + "Pillows being plumped", + "Water trickling from a zen fountain" + ], + "laundry": [ + "Washing machine's rhythmic tumbling", + "Dryer sheets being pulled apart", + "Zippers clanking against the drum", + "Liquid detergent glugging into a cup", + "Clothes being folded on a hard surface", + "Iron hissing steam on fabric", + "Buttons rattling in the spin cycle", + "Hangers clicking as they're hung on a rod" + ], + "presentation": [ + "Slide changes with a soft beep", + "Pointer tapping on a table for emphasis", + "Audience taking notes in hushed tones", + "Cables being connected for multimedia", + "Footsteps as the presenter paces", + "Questions being asked after the talk", + "Hands clapping at the presentation's conclusion", + "Breath being drawn in to calm nerves" + ], + "camping": [ + "Tent fabric flapping in the breeze", + "Zipper of a sleeping bag joining", + "Wood being chopped for a fire", + "Marshmallows crackling in the flame", + "Tree branches snapping underfoot", + "Animal calls at night", + "Streams flowing gently in the background", + "Sleepy yawns in the crisp morning air" + ], + "errands": [ + "Shopping list paper unfolding", + "Car keys jangling and igniting the engine", + "Self-checkout register giving instructions", + "Shopping carts being retrieved and returned", + "Automatic doors whooshing at entrances", + "Pen scribbling to check off items", + "Coins clinking as they're handed to cashiers", + "Plastic bags rustling with purchased goods" + ], + "tailgate": [ + "Grill igniting with a woosh", + "Cans being opened with a fizz", + "Fans cheering for their team", + "Spatula scraping on a griddle", + "Football being caught and thrown", + "Coolers sliding open on truck beds", + "Team chants being bellowed out", + "Foldable chairs snapping into place" + ], + "demo": [ + "Exclamations from onlookers at a surprising feature", + "Machinery whirring during a product test", + "Products being handled and tapped", + "Questions being asked by potential customers", + "Plastic protective film peeling away", + "Samples being distributed", + "Applause at the end of a successful demonstration", + "Pens scribbling notes or feedback" + ], + "class": [ + "Students brainstorming in small groups", + "Pencils filling in multiple-choice bubbles", + "Footsteps of an instructor walking around", + "Backpacks zipping as classes end", + "Chairs screeching slightly against the floor", + "Projector fan blowing softly during a lecture", + "Desks being arranged for a group activity", + "Excited chatter as a classmate answers correctly" + ], + "break": [ + "Steam hissing from a relaxing tea kettle", + "Keyboard keys being left idle", + "Contented sighs of stretching", + "Snack packages crinkling as they are opened", + "Laughter and banter in the break room", + "Clock ticking, counting down the break time", + "Sneakers scuffing on the floor during a quick walk", + "Soda fizzing as it's poured into a cup" + ], + "cleanup": [ + "Brooms sweeping over a floor", + "Trash bags rustling as they're tied up", + "Spray bottles misting cleaners", + "Bins opening and closing for recycling", + "Rags swooshing over surfaces", + "Vacuums humming as they pick up debris", + "Windows squeaking from being wiped down", + "Objects being reorganized on a shelf" + ], + "repair": [ + "Tools clanking in a toolbox", + "Drill whirring into wood or metal", + "Hammer pounding nails into place", + "Screws rattling in a jar", + "Parts being jostled as they're replaced", + "Tape being ripped and applied", + "Objects being tested after fixing", + "Sigh of satisfaction after a job well done" + ], + "nap": [ + "Cloth rustling as one settles in", + "Fan whirling softly in the background", + "Curtains drawing to dim the room", + "Gentle snoring or breathing", + "Clock softly ticking away the minutes", + "Bed springs creaking slightly with movement", + "Soft murmur of daytime ambience", + "Alarm beeping to awaken from the rest" + ], + "read": [ + "Pages turning slowly and deliberately", + "Occasional chuckles or gasps at a plot twist", + "Fingers tapping a rhythm on a book spine", + "Bookmarks sliding into place", + "Glasses being adjusted on the nose", + "Armchair creaking with each shift", + "Throat clearing before continuing to read aloud", + "Sudden silence as concentration deepens" + ], + "game": [ + "Die clattering on a board", + "Cards being shuffled and dealt", + "Pieces being moved across the game board", + "Timer ticking down during a speed round", + "Cheering after a winning play", + "Groans of defeat or frustration", + "Strategic whispering among teammates", + "Chips being stacked or collected" + ], + "cook": [ + "Knife dicing ingredients quickly", + "Dough being kneaded on a countertop", + "Pot lid rattling from steam", + "Vegetables being stirred in a sizzling pan", + "Oven door creaking open for inspection", + "Timer ringing to signal readiness", + "Herbs being chopped on a wooden board", + "Utensils being laid out for service" + ], + "drive": [ + "Engine revving as the vehicle accelerates", + "Wind whooshing past open car windows", + "Seatbelts clicking into their holders", + "Turn signal ticking before a corner", + "Wiper blades swishing in the rain", + "Horn honking in a traffic jam", + "Tires crunching over a gravel road", + "GPS voice announcing upcoming directions" + ], "commute": [ - "Sounds of cars honking", - "Buzz of a busy metro", - "Rhythmic chime of train tracks" - ], - "beach": [ - "Sounds of waves hitting rocks", - "Seagulls in the background", - "Children playing in the sand" - ], - "workout": [ - "Beats of high-energy music", - "Grunts and clanks of gym equipment", - "Breathless intensity of a sprint" - ], - "dinner": [ - "Clatter of cutlery on plates", - "Murmur of dinner conversation", - "Sizzle of food cooking" + "Bus doors hissing open and closed", + "Bike bell dinging on a crowded path", + "Seats shuffling as people find their spots", + "Newspaper pages rustling during the read", + "Subway screeching to a halt at a station", + "Footsteps hurrying to catch public transport", + "Beverage containers being opened and sipped from", + "Cyclists pedaling and gears shifting" + ], + "subway": [ + "Train conductor announcing stations", + "Rails clanking beneath fast-moving cars", + "Turnstile clicking as it turns", + "Passengers chatting or dozing off", + "Card reader beeping as tickets are scanned", + "Sudden silence when a car goes underground", + "Energetic performers in the station", + "Doors sliding shut and warning chimes" + ], + "cycle": [ + "Pedals turning with a rhythmic cadence", + "Chain clinking as it moves the gears", + "Tires humming on smooth pavement", + "Breath puffing from exertion", + "Helmet strap clicking secure", + "Bike frame rattling on uneven surfaces", + "Air pump whooshing to inflate tires", + "Kickstand clanking as it's put down" + ], + "sail": [ + "Sails flapping when catching the wind", + "Ropes creaking as they're pulled tight", + "Waves lapping against the hull", + "Wooden deck groaning with the swell", + "Seagulls calling out overhead", + "Flag whipping at the mast", + "Anchor dropping and chain rattling", + "Captain calling out directions" + ], + "walk": [ + "Footsteps tapping on a sidewalk", + "Leaves crunching in a park", + "Birds chirping along a nature trail", + "Children laughing in nearby playgrounds", + "Gravel scattering on a path", + "Crosswalk signal beeping for pedestrians", + "Jingling of dog collars on a leash", + "Traffic passing on adjacent streets" + ], + "ferry": [ + "Vessel's horn booming over the water", + "Ramp clanging as it lowers for vehicles", + "Waves splashing against the sides", + "Engines rumbling from below deck", + "Passengers' voices mingling on the open deck", + "Seating benches shifting slightly on the deck", + "Life jackets being adjusted with straps and buckles", + "Disembarkment bell signaling arrival" + ], + "taxi": [ + "Radio dispatch voices crackling", + "Meter beeping as it's activated", + "Traffic honking heard through closed windows", + "Upholstery creaking with passenger movement", + "Driver and passenger exchanging pleasantries", + "Car slowing with the sound of brakes", + "Trunk popping open for luggage", + "Receipts printing for the fare" + ], + "trip": [ + "Suitcases zipping and locks clicking", + "Maps folding and unfolding for navigation", + "Camera shutters snapping photos of sights", + "Vehicle doors thudding shut loaded with gear", + "Footsteps echoing in historic corridors", + "Local wildlife calls in natural reserves", + "Chatter of fellow travelers sharing experiences", + "Guidebook pages flipping in search of information" + ], + "vacation": [ + "Resort pools splashing with joyful swimmers", + "Umbrellas flapping in a seaside breeze", + "Luggage wheels rolling across hotel lobbies", + "Distant laughter from late-night revelers", + "Island music floating from a beach bar", + "Boardwalk planks creaking under foot traffic", + "Ice clinking in a glass of a chilled beverage", + "Exchange of greetings in different languages" + ], + "run": [ + "Sneakers thudding against pavement", + "Breathing quick and steady in rhythm", + "Heartbeat pulsing in ears", + "Clothing whispering with each stride", + "Water bottles sloshing mid-jog", + "Birds calling as dawn breaks", + "Timer beeping to signal lap completion", + "Encouragement shouted from a running buddy" + ], + "fitness": [ + "Weights clinking on a rack after a set", + "Treadmills humming in a steady pace", + "Jump ropes slapping the ground quickly", + "Gym mats thumping from dynamic movements", + "Instructor's voice motivating the class", + "Balls bouncing during a sports practice", + "Swim caps gliding through pool water", + "Heavy bag absorbing punches and kicks" ] } \ No newline at end of file From 665589e25da2ac7b9dd2f28f53a4a5100dfe981e Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 01:02:33 -0500 Subject: [PATCH 40/58] given events, get potential prompts randomly --- .../lg_audiogen/keyword_generator.py | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py index 52d487ad..bfff82ef 100644 --- a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -1,5 +1,6 @@ import os import json +import random # This is the default keyword dictionary. It is a JSON file that maps keywords to prompts # The CLI will allow the user to input his own dictionary of keywords @@ -14,12 +15,16 @@ except: raise Exception("Could not load keyword dictionary. Please check that the file exists.") -def match_keyword(event_name): - keywords = PROMPT_KEYWORDS.keys() - for keyword in keywords: - if keyword.lower() in event_name.lower(): - return keyword - return None +# for each word in the event name, check if it matches a keyword +# if it does, add one of the random prompt to the list to return +def get_prompts(event_name): + event_name = event_name.lower() + prompt = [] + for word in event_name.split(): + if word in PROMPT_KEYWORDS: + prompt.append(random.choice(PROMPT_KEYWORDS[word])) + return prompt + events = [ {"name": "Commute to work", "duration": 30}, @@ -27,8 +32,10 @@ def match_keyword(event_name): ] for event in events: - keyword = match_keyword(event["name"]) - if keyword: - print(f"Event {event['name']} matches keyword {keyword}.") + prompts = get_prompts(event["name"]) + if prompts: + print(f"Event {event['name']} matches:") + for p in prompts: + print(f"{p}\n") else: print(f"Event {event['name']} does not match any keywords.") \ No newline at end of file From f4a2887cf00ea55dde2f34f63fa2ea5b2782f087 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 01:11:43 -0500 Subject: [PATCH 41/58] Adds the option to generate deterministic queries --- extensions/lg_audiogen/lg_audiogen/keyword_generator.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py index bfff82ef..af420538 100644 --- a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -7,6 +7,9 @@ THIS_DIR = os.path.dirname(os.path.abspath(__file__)) KEYWORD_DICT = "/static_inputs/prompt_keywords.json" +# SEED for Deterministic Randomness +DEFAULT_SEED = 42 + # First Try to load KEYWORD_DICT, if it doesn't work, try with THIS_DIR + KEYWORD_DICT try: PROMPT_KEYWORDS = json.load(open(KEYWORD_DICT)) @@ -17,9 +20,11 @@ # for each word in the event name, check if it matches a keyword # if it does, add one of the random prompt to the list to return -def get_prompts(event_name): +# deterministic=True will make the random choice deterministic +def get_prompts(event_name, deterministic=False): event_name = event_name.lower() prompt = [] + random.seed(DEFAULT_SEED if deterministic else None) for word in event_name.split(): if word in PROMPT_KEYWORDS: prompt.append(random.choice(PROMPT_KEYWORDS[word])) From 6ab2189c0bd8b60f815994bb746ec9ceb75ab637 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 01:57:34 -0500 Subject: [PATCH 42/58] introduces gpt functionality to generate prompts --- .../lg_audiogen/lg_audiogen/gpt_utility.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 extensions/lg_audiogen/lg_audiogen/gpt_utility.py diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py new file mode 100644 index 00000000..3da4c881 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -0,0 +1,45 @@ +import os +import json +from openai import OpenAI +from dotenv import load_dotenv +load_dotenv() + +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +def query_gpt(event_list, deterministic=False): + response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": "Creative and deterministic assistant in generating sound prompts from a given list of events. Ignore proper names and focus on defined sounds." + }, + { + "role": "user", + "content": "[\"Commute to work\", \"Walk by the beach\"]" + }, + { + "role": "assistant", + "content": "[\"Cars honking in traffic\", \"Footsteps tapping on the sand with waves in the background\"]" + }, + { + "role": "user", + "content": "[\"Virtual Meeting with Nathan\", \"Beer and Chips with Friends\"]" + }, + { + "role": "assistant", + "content": "[\"Keyboard typing and mouse clicks\", \"Laughter and the clinking of glasses, crunching of chips\"]" + }, + ], + temperature=0 if deterministic else 1, + max_tokens=1101, + top_p=1, + frequency_penalty=0, + presence_penalty=0 + ) + response = json.loads(response.choices[0].message.content) + return response + +event_list = ["Commute to work", "Walk by the beach"] +response = query_gpt(event_list) +print(response) \ No newline at end of file From 49abd30d877caf63864cf8cbdbe37d98b430900f Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 01:59:43 -0500 Subject: [PATCH 43/58] adds .env format for gpt generation --- extensions/lg_audiogen/lg_audiogen/sample.env | 1 + 1 file changed, 1 insertion(+) create mode 100644 extensions/lg_audiogen/lg_audiogen/sample.env diff --git a/extensions/lg_audiogen/lg_audiogen/sample.env b/extensions/lg_audiogen/lg_audiogen/sample.env new file mode 100644 index 00000000..f81c4a84 --- /dev/null +++ b/extensions/lg_audiogen/lg_audiogen/sample.env @@ -0,0 +1 @@ +OPENAI_API_KEY = "" \ No newline at end of file From 80e13506e6a6adea0f4b9ce4e25b1451e3145abf Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 02:05:23 -0500 Subject: [PATCH 44/58] adds openai module to the project --- extensions/lg_audiogen/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py index 220198d5..7e64e0f7 100644 --- a/extensions/lg_audiogen/setup.py +++ b/extensions/lg_audiogen/setup.py @@ -14,6 +14,7 @@ "torch>=2.1.0", "torchaudio>=2.1.0", "audiocraft==1.1.0", + "openai==1.3.6" ], entry_points=''' [console_scripts] From da181f24495a9fbfc7f2d6192476d8167326f980 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 02:30:52 -0500 Subject: [PATCH 45/58] adds icalendar module to the package setup --- extensions/lg_audiogen/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py index 7e64e0f7..93e97c5f 100644 --- a/extensions/lg_audiogen/setup.py +++ b/extensions/lg_audiogen/setup.py @@ -14,6 +14,7 @@ "torch>=2.1.0", "torchaudio>=2.1.0", "audiocraft==1.1.0", + "icalendar==5.0.11", "openai==1.3.6" ], entry_points=''' From de9edf97b519c977e78ee194a11ba6d5c1fd5454 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 02:58:38 -0500 Subject: [PATCH 46/58] Adds cli opts for activities, gpt, date, random --- extensions/lg_audiogen/lg_audiogen/main.py | 37 ++++++++++++++++++++-- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py index 6e738e23..b3c9e47c 100644 --- a/extensions/lg_audiogen/lg_audiogen/main.py +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -1,10 +1,13 @@ import click import torch +import datetime from audiocraft.models import AudioGen from audiocraft.data.audio import audio_write +from lg_audiogen.calendar_reader import calendar_to_dictionary, get_events_between_dates DEFAULT_AUDIOGEN_MODEL = 'facebook/audiogen-medium' DEFAULT_AUDIO_DURATION = 5 +DEFAULT_DATE = datetime.datetime.now().strftime('%Y-%m-%d') @click.command() @click.argument('description', nargs=-1, required=False) @@ -12,11 +15,17 @@ @click.option('--model', '-m', default=DEFAULT_AUDIOGEN_MODEL, help='Name of the Audiocraft AudioGen model to use.') @click.option('--output', '-o', help='Name of the output file.') @click.option('--batch', '-b', type=click.Path(), help='File name for batch audio description.') -def parse_arguments(description, duration, model, output, batch): +@click.option('--activities', '-a', help='Comma separated string or .ics file containing activities.') +@click.option('--gpt', is_flag=True, help='Enable GPT model for activities.') +@click.option('--deterministic', is_flag=True, help='Enable deterministic generation.') +@click.option('--dates', '-dt', default=DEFAULT_DATE, help='Date in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.') +def parse_arguments(description, duration, model, output, batch, activities, gpt, deterministic, dates): """ Generates audio from description using Audiocraft's AudioGen. """ - if batch: + if activities: + handle_activities(activities, gpt, deterministic, dates) + elif batch: try: with open(batch, mode='r', encoding='utf-8') as f: descriptions = [line.strip() for line in f.readlines()] @@ -26,7 +35,29 @@ def parse_arguments(description, duration, model, output, batch): if not description: raise click.BadParameter("Description argument is required when not using --batch.") descriptions = [' '.join(description)] - run_audio_generation(descriptions, duration, model, output) + #run_audio_generation(descriptions, duration, model, output) + +def check_dates_format(dates): + dates = dates.split(',') + if len(dates) > 2: + raise click.BadParameter("Dates must be in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.") + for date in dates: + try: + datetime.datetime.strptime(date, '%Y-%m-%d') + except ValueError: + raise click.BadParameter("Dates must be in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.") + return dates + +def handle_activities(activities, gpt, deterministic, dates): + if activities.endswith('.ics'): + dates = check_dates_format(dates) + calendar_events = calendar_to_dictionary(activities) + # -1 trick to get the last element of the list (end date or single date) + sorted_events = get_events_between_dates(calendar_events, dates[0], dates[-1]) + print(sorted_events) + else: + activities = activities.split(',') + print(activities) def run_audio_generation(descriptions, duration, model_name, output): """ From 2b8b2880a0b0d2f9bcb97a0e774b473cdc7f3866 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:00:40 -0500 Subject: [PATCH 47/58] Converting activities to sounds complete --- extensions/lg_audiogen/lg_audiogen/main.py | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py index b3c9e47c..dbc7069b 100644 --- a/extensions/lg_audiogen/lg_audiogen/main.py +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -4,6 +4,8 @@ from audiocraft.models import AudioGen from audiocraft.data.audio import audio_write from lg_audiogen.calendar_reader import calendar_to_dictionary, get_events_between_dates +from lg_audiogen.gpt_utility import query_gpt +from lg_audiogen.keyword_generator import get_prompts DEFAULT_AUDIOGEN_MODEL = 'facebook/audiogen-medium' DEFAULT_AUDIO_DURATION = 5 @@ -24,7 +26,7 @@ def parse_arguments(description, duration, model, output, batch, activities, gpt Generates audio from description using Audiocraft's AudioGen. """ if activities: - handle_activities(activities, gpt, deterministic, dates) + descriptions, output = handle_activities(activities, gpt, deterministic, dates) elif batch: try: with open(batch, mode='r', encoding='utf-8') as f: @@ -35,7 +37,7 @@ def parse_arguments(description, duration, model, output, batch, activities, gpt if not description: raise click.BadParameter("Description argument is required when not using --batch.") descriptions = [' '.join(description)] - #run_audio_generation(descriptions, duration, model, output) + run_audio_generation(descriptions, duration, model, output) def check_dates_format(dates): dates = dates.split(',') @@ -54,10 +56,19 @@ def handle_activities(activities, gpt, deterministic, dates): calendar_events = calendar_to_dictionary(activities) # -1 trick to get the last element of the list (end date or single date) sorted_events = get_events_between_dates(calendar_events, dates[0], dates[-1]) - print(sorted_events) + # build a list of event name strings if event has a name + activities = [] + for each_date in sorted_events: + for each_event in sorted_events[each_date]: + if each_event['name']: + activities.append(each_event['name']) else: activities = activities.split(',') - print(activities) + if gpt: + response = query_gpt(activities, deterministic) + else: + response = get_prompts(activities, deterministic) + return response, activities def run_audio_generation(descriptions, duration, model_name, output): """ @@ -76,11 +87,13 @@ def run_audio_generation(descriptions, duration, model_name, output): # Generate audio from the descriptions wav = model.generate(descriptions) - batch_output = output + batch_output = output if type(output) == str else '' # Save the generated audios. for idx, one_wav in enumerate(wav): # Will save under {output}{idx}.wav, with loudness normalization at -14 db LUFS. if not output: batch_output = descriptions[idx].replace(' ', '_') + if type(output) == list and len(output) == len(descriptions): + batch_output = output[idx] audio_write(f'{batch_output}{idx}', one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True) From 62a15bd48ef75ce7ef947e3a2d019b6256da9f37 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:00:56 -0500 Subject: [PATCH 48/58] adds dotenv to setup --- extensions/lg_audiogen/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/setup.py b/extensions/lg_audiogen/setup.py index 93e97c5f..34bb9cd6 100644 --- a/extensions/lg_audiogen/setup.py +++ b/extensions/lg_audiogen/setup.py @@ -15,7 +15,8 @@ "torchaudio>=2.1.0", "audiocraft==1.1.0", "icalendar==5.0.11", - "openai==1.3.6" + "openai==1.3.6", + "python-dotenv==1.0.0" ], entry_points=''' [console_scripts] From e34726ec6e0f804639450fd53c8fd3783ed00cab Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:02:19 -0500 Subject: [PATCH 49/58] fix context query and code pruning --- .../lg_audiogen/lg_audiogen/calendar_reader.py | 10 +--------- extensions/lg_audiogen/lg_audiogen/gpt_utility.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index f56de51d..ebedad16 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -94,12 +94,4 @@ def get_events_between_dates(calendar_events, start_date_str, end_date_str): # Sort events for the current date by timestamp key 'ts' in ascending order events_between_dates[date_str] = sorted(calendar_events[date_str], key=lambda event: event['ts']) current_date += timedelta(days=1) - return events_between_dates - -calendar_events = calendar_to_dictionary('calendar.ics') -for date, events in sorted(calendar_events.items()): - print(f"Date: {date}") - for event in events: - print(f" Event: {event['name']}") - print(f" Duration: {event['duration']} minutes") - print() \ No newline at end of file + return events_between_dates \ No newline at end of file diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py index 3da4c881..7c2c3a71 100644 --- a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -12,7 +12,7 @@ def query_gpt(event_list, deterministic=False): messages=[ { "role": "system", - "content": "Creative and deterministic assistant in generating sound prompts from a given list of events. Ignore proper names and focus on defined sounds." + "content": "Creative and deterministic assistant in generating sound prompts from a given list of events. Ignore proper names and focus on defined sounds. Give a list output of the same length as the input." }, { "role": "user", @@ -30,6 +30,14 @@ def query_gpt(event_list, deterministic=False): "role": "assistant", "content": "[\"Keyboard typing and mouse clicks\", \"Laughter and the clinking of glasses, crunching of chips\"]" }, + { + "role": "user", + "content": "[\"Meeting with Joe\"]" + }, + { + "role": "assistant", + "content": "[\"Keyboard typing and mouse clicks with chatter in the background\"]" + }, ], temperature=0 if deterministic else 1, max_tokens=1101, @@ -39,7 +47,3 @@ def query_gpt(event_list, deterministic=False): ) response = json.loads(response.choices[0].message.content) return response - -event_list = ["Commute to work", "Walk by the beach"] -response = query_gpt(event_list) -print(response) \ No newline at end of file From 8a4782b253b4cc844c64ac4121cb93604384e644 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:02:45 -0500 Subject: [PATCH 50/58] reappropriate keyword generator --- .../lg_audiogen/keyword_generator.py | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py index af420538..c6ceeafd 100644 --- a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -21,26 +21,22 @@ # for each word in the event name, check if it matches a keyword # if it does, add one of the random prompt to the list to return # deterministic=True will make the random choice deterministic -def get_prompts(event_name, deterministic=False): - event_name = event_name.lower() - prompt = [] - random.seed(DEFAULT_SEED if deterministic else None) - for word in event_name.split(): - if word in PROMPT_KEYWORDS: - prompt.append(random.choice(PROMPT_KEYWORDS[word])) - return prompt - - -events = [ - {"name": "Commute to work", "duration": 30}, - {"name": "Going to the beach", "duration": 120}, -] - -for event in events: - prompts = get_prompts(event["name"]) - if prompts: - print(f"Event {event['name']} matches:") - for p in prompts: - print(f"{p}\n") - else: - print(f"Event {event['name']} does not match any keywords.") \ No newline at end of file +def get_prompts(event_names, deterministic=False): + if PROMPT_KEYWORDS and len(PROMPT_KEYWORDS) == 0: + raise Exception("Keyword dictionary is empty. Please check that the file is not empty.") + full_prompt = [] + for event in event_names: + event_name = event.lower() + prompt = [] + random.seed(DEFAULT_SEED if deterministic else None) + for word in event.split(): + if word in PROMPT_KEYWORDS: + prompt.append(random.choice(PROMPT_KEYWORDS[word])) + if len(prompt) > 1: + prompt = ' combined with '.join(prompt) + full_prompt.append(prompt) + elif len(prompt) == 1: + full_prompt.append(prompt[0]) + else: + full_prompt.append(event_name) # if no prompt is found, just use the event name + return full_prompt \ No newline at end of file From d535a362da62887efb35e6bc6866772ba96a7691 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:02:59 -0500 Subject: [PATCH 51/58] Fix sample prompt --- .../lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json index 286738c0..b6399823 100644 --- a/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json +++ b/extensions/lg_audiogen/lg_audiogen/static_inputs/prompt_keywords.json @@ -426,7 +426,7 @@ "Newspaper pages rustling during the read", "Subway screeching to a halt at a station", "Footsteps hurrying to catch public transport", - "Beverage containers being opened and sipped from", + "Cars honking in traffic", "Cyclists pedaling and gears shifting" ], "subway": [ From 3779996b483728c9ff0a08ed70a36f0950897af1 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:14:50 -0500 Subject: [PATCH 52/58] adds docstrings --- .../lg_audiogen/lg_audiogen/gpt_utility.py | 8 ++++++++ .../lg_audiogen/keyword_generator.py | 8 ++++++++ extensions/lg_audiogen/lg_audiogen/main.py | 17 +++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py index 7c2c3a71..92988a6d 100644 --- a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -7,6 +7,14 @@ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) def query_gpt(event_list, deterministic=False): + """ + Queries GPT-3.5 to generate a response based on the given event list. + + @param event_list: The list of events to be used as input. + @param deterministic: Flag indicating whether to use deterministic mode for GPT response generation. + + @return: The response generated by GPT-3.5 as a list of strings. + """ response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ diff --git a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py index c6ceeafd..94cfc1ad 100644 --- a/extensions/lg_audiogen/lg_audiogen/keyword_generator.py +++ b/extensions/lg_audiogen/lg_audiogen/keyword_generator.py @@ -22,6 +22,14 @@ # if it does, add one of the random prompt to the list to return # deterministic=True will make the random choice deterministic def get_prompts(event_names, deterministic=False): + """ + Creates a prompt for each event name by matching keywords + in the event name to prompts in the keyword dictionary. + + @param event_names: A list of event names + @param deterministic: A boolean to make the random choice deterministic + @return: A list of prompts for each event name + """ if PROMPT_KEYWORDS and len(PROMPT_KEYWORDS) == 0: raise Exception("Keyword dictionary is empty. Please check that the file is not empty.") full_prompt = [] diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py index dbc7069b..12567c6c 100644 --- a/extensions/lg_audiogen/lg_audiogen/main.py +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -40,6 +40,13 @@ def parse_arguments(description, duration, model, output, batch, activities, gpt run_audio_generation(descriptions, duration, model, output) def check_dates_format(dates): + """ + Checks if the dates are in the correct format. + + @param dates: The dates to be checked. If a string is provided, it will be split by commas. + + @return: A list of dates. + """ dates = dates.split(',') if len(dates) > 2: raise click.BadParameter("Dates must be in the format \'YYYY-MM-DD\' or as a range: \'YYYY-MM-DD,YYYY-MM-DD\'.") @@ -51,6 +58,16 @@ def check_dates_format(dates): return dates def handle_activities(activities, gpt, deterministic, dates): + """ + Handles the activities based on the given parameters. + + @param activities: The activities to be handled. If a string is provided, it will be split by commas. + @param gpt: Flag indicating whether to use GPT for generating response. + @param deterministic: Flag indicating whether to use deterministic mode for GPT response generation. + @param dates: The dates to filter the activities. If a string is provided, it should be in the format 'YYYY-MM-DD'. + + @return: A tuple containing the response generated and the list of activities. + """ if activities.endswith('.ics'): dates = check_dates_format(dates) calendar_events = calendar_to_dictionary(activities) From b73f1cd4ee87ce9b404dd26e5f24a75b2aeac9f1 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:15:22 -0500 Subject: [PATCH 53/58] Removes unused code, Adds docstrings --- .../lg_audiogen/calendar_reader.py | 70 ++++++++++++++++--- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py index ebedad16..fe00f909 100644 --- a/extensions/lg_audiogen/lg_audiogen/calendar_reader.py +++ b/extensions/lg_audiogen/lg_audiogen/calendar_reader.py @@ -2,28 +2,57 @@ from datetime import datetime, date, timedelta, timezone from dateutil.rrule import rrulestr -# Inclusive [2021, 2021] MIN_YEAR = datetime.now().year MAX_YEAR = MIN_YEAR -# We only to get the events from the minimum year to the max year def is_within_limit(dt): + """ + Checks if the datetime is within the limit. + + @param dt: The datetime to check. + + @return: True if the datetime is within the limit, False otherwise. + """ return MIN_YEAR <= dt.year <= MAX_YEAR def convert_to_utc(dt): + """ + Converts a datetime with timezone info to UTC. + + @param dt: The datetime to convert. + + @return: The datetime converted to UTC. + """ if isinstance(dt, datetime) and dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None: # Convert offset-aware datetime to UTC return dt.astimezone(timezone.utc) return dt def datetime_to_timestamp(dt): + """ + Converts a datetime or date to a timestamp. + + @param dt: The datetime or date to convert. + + @return: The timestamp. + """ if isinstance(dt, datetime): return dt.timestamp() elif isinstance(dt, date): return datetime.combine(dt, datetime.min.time(), tzinfo=timezone.utc).timestamp() raise TypeError("Expected datetime.datetime or datetime.date") -def populate_events(component, start_dt, calendar_events, summary, duration): +def populate_events(start_dt, calendar_events, summary, duration): + """ + Populates the calendar_events dictionary with the events. + + @param start_dt: The start datetime. + @param calendar_events: The dictionary of events. + @param summary: The title/summary of the event. + @param duration: The duration of the event. + + @return: 1 if the event was added, 0 otherwise. + """ if not is_within_limit(start_dt): return 0 @@ -43,14 +72,30 @@ def populate_events(component, start_dt, calendar_events, summary, duration): return 1 def populate_recurring_events(component, start_dt, calendar_events, summary, duration): + """ + Populates the calendar_events dictionary with the recurring events. + + @param component: The component to populate the events from. + @param start_dt: The start datetime. + @param calendar_events: The dictionary of events. + @param summary: The title/summary of the event. + @param duration: The duration of the event. + """ # rr will give us a generator rr = rrulestr(component.get('rrule').to_ical().decode('utf-8'), dtstart=start_dt) for dt in rr: - if populate_events(component, dt, calendar_events, summary, duration) == 0: + if populate_events(dt, calendar_events, summary, duration) == 0: return # short circuit if we're out of the range def calendar_to_dictionary(filepath): + """ + Given a filepath to a calendar file, returns a dictionary of events. + + @param filepath: The filepath to the calendar file. + + @return: A dictionary of events from the .ics file. + """ # Read the user's calendar file and parse it into an icalendar object with open(filepath, 'r', encoding='utf-8') as f: gcal = Calendar.from_ical(f.read()) @@ -71,17 +116,20 @@ def calendar_to_dictionary(filepath): if 'rrule' in component: populate_recurring_events(component, start_dt, calendar_events, summary, duration) else: - populate_events(component, start_dt, calendar_events, summary, duration) + populate_events(start_dt, calendar_events, summary, duration) return calendar_events -def get_events_for_specific_date(calendar_events, specific_date_str): - # Assumes specific_date_str is in YYYY-MM-DD format - day_events = calendar_events.get(specific_date_str, []) - # Sort events by timestamp key 'ts' in ascending order - return sorted(day_events, key=lambda event: event['ts']) - def get_events_between_dates(calendar_events, start_date_str, end_date_str): + """ + Given a dictionary of events, returns the events between two dates [start_date, end_date]. + + @param calendar_events: The dictionary of events. + @param start_date_str: The start date. + @param end_date_str: The end date. + + @return: The events between the two dates. + """ # Assumes start_date_str and end_date_str are in YYYY-MM-DD format and start_date <= end_date start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date() end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date() From ee64dbc229b0818d86da2162ce028d1514dcb22d Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 04:54:15 -0500 Subject: [PATCH 54/58] Improves GPT model + context --- .../lg_audiogen/lg_audiogen/gpt_utility.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py index 92988a6d..e22a24a8 100644 --- a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -16,11 +16,11 @@ def query_gpt(event_list, deterministic=False): @return: The response generated by GPT-3.5 as a list of strings. """ response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4-1106-preview", messages=[ { "role": "system", - "content": "Creative and deterministic assistant in generating sound prompts from a given list of events. Ignore proper names and focus on defined sounds. Give a list output of the same length as the input." + "content": "Creative assistant in generating sound prompts from a given list of events. Outputs a json object of sounds. Size of the output should be the same as the input" }, { "role": "user", @@ -28,7 +28,7 @@ def query_gpt(event_list, deterministic=False): }, { "role": "assistant", - "content": "[\"Cars honking in traffic\", \"Footsteps tapping on the sand with waves in the background\"]" + "content": "{sounds: [\"Cars honking in traffic\", \"Footsteps tapping on the sand with waves in the background\"]}" }, { "role": "user", @@ -36,7 +36,7 @@ def query_gpt(event_list, deterministic=False): }, { "role": "assistant", - "content": "[\"Keyboard typing and mouse clicks\", \"Laughter and the clinking of glasses, crunching of chips\"]" + "content": "{sounds: [\"Keyboard typing and mouse clicks\", \"Laughter and the clinking of glasses, crunching of chips\"]}" }, { "role": "user", @@ -44,14 +44,28 @@ def query_gpt(event_list, deterministic=False): }, { "role": "assistant", - "content": "[\"Keyboard typing and mouse clicks with chatter in the background\"]" + "content": "{sounds: [\"Keyboard typing and mouse clicks with chatter in the background\"]}" }, + { + "role": "user", + "content": "[\"'23.FAL.B.1 Pod Meeting - MLH Fellowship\", \"Oscar Mier and Nathan Kurelo Wilk\", \"Monday MS FinTech Classes\", \"Tuesday MS FinTech Classes\", \"23.FAL.B.1 Pod Meeting - MLH Fellowship\", \"Wednesday MS FinTech Classes\"]" + }, + { + "role": "assistant", + "content": "{sounds: [\"Mic feedback, low murmur of voices discussing on a conference call\",\"Ambient room noise\",\"Turning pages, lecturer speaking faintly in the background\",\"Turning pages, lecturer speaking faintly in the background\",\"Mic feedback, low murmur of voices discussing on a conference call\",\"Turning pages, lecturer speaking faintly in the background\"]}" + }, + { + "role": "user", + "content": json.dumps(event_list) + } ], temperature=0 if deterministic else 1, max_tokens=1101, top_p=1, frequency_penalty=0, - presence_penalty=0 + presence_penalty=0, + response_format={ "type": "json_object" } ) - response = json.loads(response.choices[0].message.content) - return response + response = json.loads(response.choices[0].message.content).get("sounds") + print("GPT Response", response) + return response \ No newline at end of file From 5c11458b2f26866599da99d47163ea0944295ed0 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 05:01:30 -0500 Subject: [PATCH 55/58] downgrade to cheaper model --- extensions/lg_audiogen/lg_audiogen/gpt_utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py index e22a24a8..6f61d431 100644 --- a/extensions/lg_audiogen/lg_audiogen/gpt_utility.py +++ b/extensions/lg_audiogen/lg_audiogen/gpt_utility.py @@ -16,7 +16,7 @@ def query_gpt(event_list, deterministic=False): @return: The response generated by GPT-3.5 as a list of strings. """ response = client.chat.completions.create( - model="gpt-4-1106-preview", + model="gpt-3.5-turbo-1106", messages=[ { "role": "system", From ceaa86fc51972e577b732852a9d6d407d4a8932f Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 12:25:28 -0500 Subject: [PATCH 56/58] Adds Test Case to check activity functionality --- extensions/lg_audiogen/lg_audiogen/main.py | 1 + extensions/lg_audiogen/tests/test_main.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/extensions/lg_audiogen/lg_audiogen/main.py b/extensions/lg_audiogen/lg_audiogen/main.py index 12567c6c..b563a587 100644 --- a/extensions/lg_audiogen/lg_audiogen/main.py +++ b/extensions/lg_audiogen/lg_audiogen/main.py @@ -85,6 +85,7 @@ def handle_activities(activities, gpt, deterministic, dates): response = query_gpt(activities, deterministic) else: response = get_prompts(activities, deterministic) + activities = [activity.replace(' ', '_') for activity in activities] return response, activities def run_audio_generation(descriptions, duration, model_name, output): diff --git a/extensions/lg_audiogen/tests/test_main.py b/extensions/lg_audiogen/tests/test_main.py index c398fcaf..8159fe74 100644 --- a/extensions/lg_audiogen/tests/test_main.py +++ b/extensions/lg_audiogen/tests/test_main.py @@ -11,3 +11,16 @@ def test_single_description(): # Assert that the output file was created assert os.path.exists("dog_barking0.wav"), "Output file dog_barking0.wav was not created" os.remove("dog_barking0.wav") + +def test_activity_to_sound(): + ''' + Tests output with a single activity + ''' + # Run the script with an example activity + subprocess.run(["lg_audiogen", "-a", "meeting with nathan"], + capture_output=True, text=True, check=False) + # print the ls command output + print(subprocess.run(["ls"], capture_output=True, text=True, check=False)) + # Assert that the output file was created + assert os.path.exists("meeting_with_nathan0.wav"), "Output file meeting_with_nathan0.wav was not created" + os.remove("meeting_with_nathan0.wav") From af23faa7b773a78d84b80be2424b47d3a68c65f3 Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 12:26:32 -0500 Subject: [PATCH 57/58] Adds ReadME with new functionalities & use cases --- extensions/lg_audiogen/README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md index dad65c03..6c89ff2e 100644 --- a/extensions/lg_audiogen/README.md +++ b/extensions/lg_audiogen/README.md @@ -1,6 +1,6 @@ # Audiogen -Audiogen is a Python command-line tool that uses models from Audiocraft's AudioGen to generate audio from specified descriptions. This tool can generate a single piece of audio based on a specific description or multiple pieces of audio based on a batch file containing multiple descriptions. +Audiogen is a Python command-line tool that uses models from Audiocraft's AudioGen to generate audio from specified descriptions. This tool can generate a single piece of audio based on a specific description, multiple pieces of audio based on a batch file containing multiple descriptions, or based on activities from a string or an `.ics` calendar file. ## Features @@ -8,6 +8,10 @@ Audiogen is a Python command-line tool that uses models from Audiocraft's AudioG * Ability to generate audio based on a batch file. * Ability to specify the model to be used for the audio generation. * Ability to set the output file name. +* Ability to generate audio based on daily acitivites from a comma-separated string or a `.ics` calendar file. +* Ability to integrate with GPT models to enhance activity descriptions. +* Ability to enable pseudo-deterministic activity prompts +* Ability to specify a date or a range of dates to get events from the `.ics` calendar file. ## Setup @@ -30,6 +34,10 @@ The CLI usage for Audiogen is `lg_audiogen [OPTIONS] [DESCRIPTION]...`. * `model, -m`: name of the Audiocraft AudioGen model to use, default is 'facebook/audiogen-medium'. * `output, -o`: name of the output file. * `batch`: file name for batch audio description. +* `activities, -a`: comma-separated string or `.ics` calendar file containing events. +* `gpt`: New: flag to enable GPT model for activities description enhancement. +* `deterministic`: New: flag to enable deterministic generation. +* `dates, -dt`: New: date in the format 'YYYY-MM-DD' or as a range 'YYYY-MM-DD,YYYY-MM-DD'. ### Example @@ -41,8 +49,14 @@ lg_audiogen -d 5 -m 'facebook/audiogen-medium' -o 'my_output' 'dog barking' lg_audiogen 'dog barking' lg_audiogen -b 'batch.txt' + +lg_audiogen -a 'meeting with nathan, lunch with friends' -gpt -deterministic + +lg_audiogen -a "calendar.ics" -gpt -dt '2023-11-29,2023-12-01' ``` +**Note:** for GPT usage, create a `.env` file with the same format as the `sample.env` file provided. + ### Batch File Format The batch file should contain one description per line. The descriptions should be in the same format as the descriptions used in the command-line interface. From 2297f2e69254d22c1ff12b3351e2722afe6edf6e Mon Sep 17 00:00:00 2001 From: Nate8888 Date: Fri, 1 Dec 2023 12:37:07 -0500 Subject: [PATCH 58/58] Fix activities in the ReadME --- extensions/lg_audiogen/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/lg_audiogen/README.md b/extensions/lg_audiogen/README.md index 6c89ff2e..055ffc48 100644 --- a/extensions/lg_audiogen/README.md +++ b/extensions/lg_audiogen/README.md @@ -8,7 +8,7 @@ Audiogen is a Python command-line tool that uses models from Audiocraft's AudioG * Ability to generate audio based on a batch file. * Ability to specify the model to be used for the audio generation. * Ability to set the output file name. -* Ability to generate audio based on daily acitivites from a comma-separated string or a `.ics` calendar file. +* Ability to generate audio based on daily activities from a comma-separated string or a `.ics` calendar file. * Ability to integrate with GPT models to enhance activity descriptions. * Ability to enable pseudo-deterministic activity prompts * Ability to specify a date or a range of dates to get events from the `.ics` calendar file.