aume · jkranabetter · Sep 23, 2021 · Sep 23, 2021 · Sep 23, 2021 · Sep 23, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,105 +1,5 @@
-# Byte-compiled / optimized / DLL files
+# Python Cache
 __pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-pip-wheel-metadata/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-.python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
 
 # Environments
 .env
@@ -109,21 +9,3 @@ venv/
 ENV/
 env.bak/
 venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2021 Miles Thorogood
+Copyright (c) 2022 Miles Thorogood, Joshua Kranabetter
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,38 +1,43 @@
-# BFsegmenter
+# BFSegmenter
 
-## Requirements:
+The BFSegmenter segments audio files and classifies each segment as background, foreground, or background with foreground. Additionaly, for each segment the affect is predicted on a scale of valence and arousal.
 
-sklearn
+![Pipeline](/images/pipeline.png)
 
-numpy
+Sound designers and soundscape composers manually segment audio files into building blocks for use in a composition. **Machine learning (ridge regression)** is used to classify segments in an audio file automatically. The model has a **83.0% true positive classification rate**. 
 
-matplotlib
+Russel’s model
+suggests all emotions are distributed in a circular space (https://psycnet.apa.org/record/1981-25062-001).
+High levels of valence correspond to pleasant sounds while
+low valence levels correspond to unpleasant sounds. Further, high levels of arousal correspond to exciting sounds while low levels correspond to calming sounds. **Levels of valence and arousal are quantified using machine learning for emotion prediction (random forest regression)**. The emotion prediction models use a subset of extracted features to predict valence and arousal on a scale from -1 to 1 for each segment in an audio file.
 
-sqlite3
+![Affect Accuracy](/images/affect_accuracy.png)
 
-Python 2.7 (porting to Python 3 is a task)
+Example implimentation of the segmenter in *extract_audacity_labels.py*.
 
-YAAFE https://github.com/Yaafe/Yaafe
+## Segment format
 
-pydub https://github.com/jiaaro/pydub
+    bf type
+    duration
+    start
+    end
+    features
+    arousal
+    valence
+    bf probabilities
 
-## TODO:
+## Dependancies
+Essentia - an open-source library for tools for audio and music analysis, description and synthesis. https://essentia.upf.edu/ 
 
-port to Python 3
+Scikit-learn - a free software machine learning library for the Python programming language.
 
-corpusAuquireDir.py
+For full requirements, check *requirements.txt*.
 
-  svnsegmenter.py
+## Authors
 
-  bf_classifier.py
+ - Miles Thorogood
+ - Joshua Kranabetter
 
-  affect_predictor.py
+## License
 
-  yaafeEngine.py
-
-## Running
-python corpusAquireDir.py /path/to/audio/files
-
-audio files are aif or wav
-
-segments the files in the directory and puts the segs into SegmentedCorpus
+This project is licensed under the MIT License - see the *LICENSE* file for details.
diff --git a/affect_predictor.py b/affect_predictor.py
@@ -1,66 +1,51 @@
-
-#!/usr/bin/env python
-
 import numpy as np
-import csv
-import matplotlib.pyplot as plt
-from sklearn import linear_model
-import math
+from sklearn.ensemble import RandomForestRegressor
 
 class AffectPredict:
-    """docstring for AffectPredict"""
-
+    '''
+        Emotion prediction models for valence and arousal.
+    '''
     def __init__(self):
+        # masks to select features
+        self.AROUSAL_MASK = [11, 27, 29, 30, 33, 34, 48, 80, 89, 98, 110, 117, 118, 127, 128, 131, 133, 146, 166, 171, 203, 204, 219, 221, 236, 239, 261, 262, 264, 266, 267, 268, 343, 346, 347, 348, 349, 354, 355, 356, 364, 377, 379, 382, 383, 397, 437, 448, 450, 455, 463, 467, 468, 475, 478, 485, 487, 488, 491, 494, 497, 498, 508, 512, 518, 527, 529, 530, 531, 537, 539, 541, 544, 550, 557, 560, 573, 576, 580, 583, 584, 588, 596, 599, 602]
+        self.VALENCE_MASK = [0, 12, 27, 29, 31, 32, 33, 35, 37, 39, 42, 45, 46, 48, 49, 53, 55, 75, 79, 86, 87, 89, 91, 93, 95, 104, 106, 118, 127, 131, 133, 134, 135, 138, 140, 148, 152, 155, 156, 157, 158, 159, 161, 166, 175, 180, 182, 183, 185, 186, 187, 188, 189, 195, 197, 200, 206, 207, 216, 218, 221, 227, 231, 242, 245, 248, 249, 254, 255, 256, 257, 264, 269, 270, 334, 335, 337, 339, 341, 353, 354, 357, 360, 364, 366, 367, 369, 370, 371, 373, 374, 376, 377, 378, 380, 381, 382, 384, 388, 389, 391, 393, 395, 398, 411, 412, 415, 416, 417, 418, 419, 423, 424, 425, 431, 432, 433, 435, 436, 437, 438, 443, 445, 449, 451, 458, 461, 462, 466, 468, 469, 470, 471, 473, 474, 475, 481, 482, 483, 485, 488, 489, 496, 497, 498, 499, 501, 505, 517, 518, 521, 523, 526, 528, 529, 530, 534, 535, 537, 539, 540, 542, 543, 544, 545, 549, 550, 553, 554, 555, 560, 563, 568, 573, 575, 577, 580, 583, 584, 585, 586, 599, 602]
 
-        f = open('studyData_.csv','rb')
-
-        self.header = f.readline().split(',')
-        training_data = np.loadtxt(f,delimiter=",",skiprows=1)
-
-        self.Y = training_data[:,0:2] # first two colums are valence and arousal    
-        self.X = training_data[:,2:len(training_data)] # remaining columns are audio features
-
-        assert len(self.Y) == len(self.X)
-
-        self.valence_model = linear_model.LinearRegression() # Ridge (alpha = .5)
-        self.valence_model.fit(self.X,self.Y[:,0])
-
-        self.arousal_model = linear_model.LinearRegression()
-        self.arousal_model.fit(self.X,self.Y[:,1])
-
-
+        fa = open('datasets/arousal_data.csv','r')
+        fv = open('datasets/valence_data.csv','r')
+
+        self.arousal_header = fa.readline().split(',')
+        self.valence_header = fv.readline().split(',')
+
+        arousal_data = np.loadtxt(fa,delimiter=",")
+        valence_data = np.loadtxt(fv,delimiter=",")
+
+        self.arousal_y = arousal_data[:,-1:]
+        self.arousal_X = arousal_data[:,0:-1]
+        self.valence_y = valence_data[:,-1:]
+        self.valence_X = valence_data[:,0:-1]
+
+        # verify correct training data length
+        assert len(self.arousal_X) == len(self.arousal_y)
+        assert len(self.valence_X) == len(self.valence_y)
+
+        # apply mask to get select features only
+        self.arousal_X = [x[self.AROUSAL_MASK] for x in self.arousal_X]
+        self.valence_X = [x[self.VALENCE_MASK] for x in self.valence_X]
+
+        # create arousal model
+        self.arousal_model = RandomForestRegressor(max_depth=20, min_samples_split=5, oob_score=True)
+        self.arousal_model.fit(self.arousal_X, self.arousal_y.ravel())
+
+        # create valence model
+        self.valence_model = RandomForestRegressor(max_depth=30, min_samples_leaf=2, min_samples_split=5, oob_score=True)
+        self.valence_model.fit(self.valence_X, self.valence_y.ravel())
+
     def predict_valence(self, Z):
-
-        return self.valence_model.predict(Z)
-
-    def predict_arousal(self, Z):
-
-        return self.arousal_model.predict(Z)
-
-    def model_stats(self):
+        return self.valence_model.predict([Z]).item(0)
 
-        print("Valence RSS: %.2f"
-            % np.mean((self.valence_model.predict(self.X) - self.Y[:,0]) ** 2))
-        print("Arousal RSS: %.2f"
-            % np.mean((self.arousal_model.predict(self.X) - self.Y[:,1]) ** 2))
-        print('Valence variance score: %.2f' % self.valence_model.score(self.X, self.Y[:,0]))
-        print('Arousal variance score: %.2f' % self.arousal_model.score(self.X, self.Y[:,1]))
-
-    def visualize_model(self, x, y, m, c):
-        '''
-            TODO: needs finxin
-        '''
-        w = 2
-        h = 2#math.floor(len(y)/2)
-        f, axarr = plt.subplots(w,h)
-        count = 0
-        for  i in range(w):
-            for j in range(h):
-                 axarr[i,j].plot(x[:,count], y, 'o', markersize=3) # label='Original data'
-                 axarr[i,j].plot(x[:,count], m[count]*x[:,count]+c, 'r') # label='Fitted line'
-                 axarr[i,j].set_title(self.header[2+count]) 
-                 #axarr[i,j].set_yscale('exp')
-                 count += 1  
+    def predict_arousal(self, Z):
+        return self.arousal_model.predict([Z]).item(0)
 
-        plt.legend()
-        plt.show()
+    def model_stats(self):
+        print('arousal r-squared score: %.2f' % self.arousal_model.oob_score_)
+        print('valence r-squared score: %.2f' % self.valence_model.oob_score_)