diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/PyVino.iml b/.idea/PyVino.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/PyVino.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
new file mode 100644
index 0000000..5b2b08f
--- /dev/null
+++ b/.idea/dataSources.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
+    <data-source source="LOCAL" name="raman" uuid="6d36d5a1-9abb-470b-9c9c-7ea1f0d23aa1">
+      <driver-ref>sqlite.xerial</driver-ref>
+      <synchronize>true</synchronize>
+      <jdbc-driver>org.sqlite.JDBC</jdbc-driver>
+      <jdbc-url>jdbc:sqlite:$PROJECT_DIR$/raman.db</jdbc-url>
+      <working-dir>$ProjectFileDir$</working-dir>
+      <libraries>
+        <library>
+          <url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.34.0/sqlite-jdbc-3.34.0.jar</url>
+        </library>
+      </libraries>
+    </data-source>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..1b700c1
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,8 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyBroadExceptionInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyPep8NamingInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
+    <inspection_tool class="SqlResolveInspection" enabled="false" level="ERROR" enabled_by_default="false" />
+  </profile>
+</component>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..d1e22ec
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..d2c2de9
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PyVino.iml" filepath="$PROJECT_DIR$/.idea/PyVino.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/ramandb.py b/ramandb.py
index 52ed268..3725b18 100644
--- a/ramandb.py
+++ b/ramandb.py
@@ -1,32 +1,85 @@
+import dcclab
 from dcclab.database import *
 import numpy as np
 import requests
+from BaselineRemoval import BaselineRemoval
+import re
 
-class RamanDB(Database):
-    url = 'https://www.dropbox.com/s/peowchyj7xyib4w/raman.db?dl=1'
-    def __init__(self, writePermission=False):  
+class RamanDB(dcclab.database.Database):
+    def __init__(self, databaseURL = None):
         """
-        Creates the database object for Raman spectra.
+        The Database is a MySQL database on cafeine called `raman`.
         """
-
-        self.databasePath = "raman.db"
-        if not os.path.exists(self.databasePath):
-            print("The raman.db file is not available. Atttempting to download from {0}".format(self.url))
-            filename = self.downloadDatabase()
-            if os.path.exists(filename) and not os.path.exists(self.databasePath):
-                os.rename(filename, self.databasePath)
-                print("Success. File has been renamed raman.db")                
+        if databaseURL is None:
+            databaseURL = "mysql://dcclab@cafeine2.crulrg.ulaval.ca/dcclab@raman"
 
         self._wavelengths = None
+        self._wavelengthMask = None
         self.progressStart = None
-        super().__init__(self.databasePath, writePermission=writePermission)
+        self.constraints = []
+        self.pumpWavelengthInNm = 785
+        super().__init__(databaseURL)
+
+        if dcclab.__version__ < "1.0.3":
+            print("You should update PyDCCLab with `pip install dcclab` to get the latest version.")
+
+    def showHelp(self):
+        print("""
+        All wines obtained from the group are in this database. Things to know:
+        * Wines are identified with a "wineId" that is A,B,C, .... AA, AB, AC, .... etc.
+        * Each wine has a number a spectrum acquisitions associated with it (typically 30, 60, etc...)
+        * When a Raman spectrum is acquired
+        
+        """)
+
+    def execute(self, statement, bindings=None):
+        """
+        This function with "bindings" is necessary to handle binary data: it cannot be inserted with a string statement.
+        The bindings are explained here: https://zetcode.com/db/sqlitepythontutorial/ and are similar to .format()
+        but are handled properly by the sqlite3 module instead of a python string. Without it, binary data
+        is inserted as a string, which is not good.
+
+        See insertFileContentIntoSources() for an example.
+
+        """
+        if bindings is None:
+            super().execute(statement) # Call the original function from dcclab.database
+        else:
+            self.cursor.execute(statement, bindings)
+
+    def executeCount(self, statement, bindings=None):
+        """
+        This function with "bindings" is necessary to handle binary data: it cannot be inserted with a string statement.
+        The bindings are explained here: https://zetcode.com/db/sqlitepythontutorial/ and are similar to .format()
+        but are handled properly by the sqlite3 module instead of a python string. Without it, binary data
+        is inserted as a string, which is not good.
+
+        See insertFileContentIntoSources() for an example.
+
+        """
+        self.execute(statement, bindings)
+        singleRecord = self.fetchOne()
+        keys = list(singleRecord.keys())
+        if len(keys) == 1:
+            return int(singleRecord[keys[0]])
+        else:
+            return None
 
-    def downloadDatabase(self):
-        r = requests.get(self.url, allow_redirects=True)
-        filename = "raman-download.db"
-        with open(filename, 'wb') as file:
-            file.write(r.content)
-        return filename
+    def parseURL(self, url):
+        #mysql://sshusername:sshpassword@cafeine2.crulrg.ulaval.ca/mysqlusername:mysqlpassword@questions
+        if dcclab.__version__ >= "1.0.4":
+            print("No need to patch parseURL in this dcclab version")
+
+        match = re.search("(mysql)://(.*?)@?([^@]+?)/(.*?)@(.+)", url)
+        if match is not None:
+            protocol = Engine.mysql
+            sshuser = match.group(2)
+            host = match.group(3)
+            mysqluser = match.group(4)
+            database = match.group(5)
+            return (protocol, sshuser, host, mysqluser, database)
+        else:
+            return (Engine.sqlite3, None, "127.0.0.1", None, url)
 
     @property
     def wavelengths(self):
@@ -35,8 +88,96 @@ def wavelengths(self):
 
         return self._wavelengths
 
+    @property
+    def wavenumbers(self):
+        return 1e7*(1.0/self.pumpWavelengthInNm - 1.0/self.wavelengths)
+
+    @property
+    def wavelengthMask(self):
+        if self._wavelengthMask is None:
+            self._wavelengthMask = self.getWavelengthMask()
+
+        return self._wavelengthMask
+
+    def getWavelengthMask(self):
+        self.execute(r"select distinct(wavelength), intensity from spectra where dataType='mask-wine' order by wavelength")
+        rows = self.fetchAll()
+        nTotal = len(rows)
+
+        if nTotal != 0:
+            mask = np.zeros(shape=(nTotal),dtype=bool)
+            for i,row in enumerate(rows):
+                mask[i] = bool(row['intensity'])
+        else:
+            mask = np.zeros(shape=(len(self.wavelengths)))
+            for i in range(200, 1000):
+                mask[i] = True
+            self.insertSpectralData(wavelengths=self.wavelengths, intensities=mask, dataType='mask-wine', wineId=None, sampleId=None, algorithm='BaselineRemoval')
+
+        return mask
+
+    def readQEProFile(self, filePath):
+        # text_file = open(filePath, "br")
+        # hash = hashlib.md5(text_file.read()).hexdigest()
+        # text_file.close()
+
+        with open(filePath, "r") as text_file:
+            lines = text_file.read().splitlines()
+
+            wavelengths = []
+            intensities = []
+            for line in lines:
+                match = re.match(r'^\s*(\d+\.?\d+)\s+(-?\d*\.?\d*)', line)
+                if match is not None:
+                    intensity = match.group(2)
+                    wavelength = match.group(1)
+                    wavelengths.append(wavelength)
+                    intensities.append(intensity)
+                else:
+                    pass
+                    # print("Line does not match: {0}".format(line))
+        return wavelengths, intensities
+
+    def insertSpectralDataFromFiles(self, filePaths, dataType='raw'):
+        inserted = 0
+        for filePath in filePaths:
+            match = re.search(r'([A-Z]{1,2})_?(\d{1,3})\.', filePath)
+            if match is None:
+                raise ValueError("The file does not appear to have a valid name: {0}".format(filePath))
+
+            wineId = int(ord(match.group(1))-ord('A'))
+            sampleId = int(match.group(2))
+            spectrumId = "{0:04}-{1:04d}".format(wineId, sampleId)
+
+            wavelengths, intensities = self.readQEProFile(filePath)
+            try:
+                self.insertSpectralData(wavelengths, intensities, dataType, wineId, sampleId)
+                print("Inserted {0}".format(filePath))
+                inserted += 1
+            except ValueError as err:
+                print(err)
+
+        return inserted
+
+    def insertSpectralData(self, wavelengths, intensities, dataType, wineId, sampleId, algorithm=None):
+        if wineId is None or sampleId is None:
+            spectrumId = None
+        else:
+            spectrumId = "{0:04}-{1:04d}".format(wineId, sampleId)
+
+        count = self.executeCount('select count(*) as count from spectra where spectrumId = "{0}" and dataType = "{1}"'.format(spectrumId, dataType))
+        if count != 0 :
+            raise ValueError("Spectrum {0} already exists with dataType='{1}'".format(spectrumId, dataType))
+
+        values = []
+        for x,y in zip(wavelengths, intensities):
+            values.append("({0}, {1}, '{2}', '{3}', '{4}', '{5}', now(), '{6}') ".format(x,float(y), dataType, wineId, sampleId, spectrumId, algorithm))
+
+        bigStatement = "insert into spectra (wavelength, intensity, dataType, wineId, sampleId, spectrumId, dateAdded, algorithm) values" + ','.join(values)
+        self.execute( bigStatement)
+
     def getWavelengths(self):
-        self.execute(r"select distinct(wavelength) from spectra order by wavelength")
+        self.execute(r"select distinct(wavelength) from spectra where dataType='raw' order by wavelength")
         rows = self.fetchAll()
         nTotal = len(rows)
 
@@ -46,15 +187,43 @@ def getWavelengths(self):
 
         return wavelengths
 
+    def getDataTypes(self):
+        self.execute('select distinct dataType from spectra')
+        rows = self.fetchAll()
+        dataTypes = []
+        for row in rows:
+            dataTypes.append(row["dataType"])
+
+        return dataTypes
 
-    def getCountFiles(self):
+    def getWineIds(self):
+        self.execute(r"select count(*) as count, wineId as id from files group by wineId order by wineId;")
+        rows = self.fetchAll()
+        identifiers = {}
+        for row in rows:
+            id = row["id"]
+            nSamples = row["count"]
+            identifiers[id] = nSamples
+        return identifiers
+
+    def getWinesSummary(self):
+        # mysql.connector.errors.ProgrammingError: 1055(
+        #     42000): Expression  # 4 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'raman.wines.dateOpened' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
+
+        self.execute(r"select files.wineId,  count(*) as nSamples, wines.* from files inner join wines on wines.wineId = files.wineId group by files.wineId order by files.wineId")
+        rows = self.fetchAll()
+        wines = []
+        for row in rows:
+            wines.append(dict(row))
+        return wines
+
+    def getFileCount(self):
         self.execute(r"select count(*) as count from files")
         rows = self.fetchAll()
         if rows is None:
             return 0
         return rows[0]["count"]
 
-
     def getSpectraPaths(self):
         self.execute("select path from files order by path")
         rows = self.fetchAll()
@@ -63,37 +232,118 @@ def getSpectraPaths(self):
             paths.append(row['path'])
         return paths
 
-    def getIntensities(self, limit=None):
+    def getSpectrum(self, dataType, spectrumId):
+        whereConstraints = []
+        possibleDataTypes = self.getDataTypes()
+
+        if dataType is None:
+            dataType = 'raw'
+        if dataType not in possibleDataTypes:
+            raise ValueError('Possible dataTypes are {0}'.format(possibleDataTypes))
+        whereConstraints.append("dataType = '{0}'".format(dataType))
+
+        whereConstraints.append("spectrumId = '{0}'".format(spectrumId))
+
+        if len(whereConstraints) != 0:
+            whereClause = "where " + " and ".join(whereConstraints)
+        else:
+            whereClause = ""
+
         stmnt = """
-        select wavelength, intensity, files.path from spectra 
-        inner join files on files.fid = spectra.fid
-        order by files.path, wavelength """
+        select wavelength, intensity, spectra.spectrumId from spectra
+        {0} 
+        order by spectra.spectrumId, spectra.wavelength """.format(whereClause )
 
         wavelengths = self.getWavelengths()
         nWavelengths = len(wavelengths)
 
+        self.execute(stmnt)
+
+        rows = []
+        row = self.fetchOne()
+        while row is not None:
+            rows.append(row)
+            if len(rows) % 100 == 0:
+                print(".", end='')
+            row = self.fetchOne()
+
+        nSamples = len(rows)//nWavelengths
+        if nSamples == 0:
+            return None, None
+
+        spectra = np.zeros(shape=(nWavelengths, nSamples))
+        spectrumIdentifiers = [""]*nSamples
+        for i,row in enumerate(rows):
+            spectra[i%nWavelengths, i//nWavelengths] = float(row['intensity'])
+            spectrumIdentifiers[i//nWavelengths] = row['spectrumId']
+
+        return spectra, spectrumIdentifiers
+
+    def getSpectraWithId(self, dataType=None, color=None, limit=None):
+        whereConstraints = []
+        possibleDataTypes = self.getDataTypes()
+
+        if dataType is None:
+            dataType = 'raw'
+        if dataType not in possibleDataTypes:
+            raise ValueError('Possible dataTypes are {0}'.format(possibleDataTypes))
+        whereConstraints.append("dataType = '{0}'".format(dataType))
+
+        if color is not None:
+            whereConstraints.append(' wineId in (select wineId from wines where color="{0}") '.format(color))
+
+        if len(whereConstraints) != 0:
+            whereClause = "where " + " and ".join(whereConstraints)
+        else:
+            whereClause = ""
+
+        stmnt = """
+        select wavelength, intensity, spectra.spectrumId 
+            from spectra
+            {0}
+        order by spectra.spectrumId, spectra.wavelength """.format(whereClause )
+
+        wavelengths = self.wavelengths
+        nWavelengths = len(wavelengths)
+
         if limit is not None:
             stmnt += " limit {0}".format(limit*nWavelengths)
 
         self.execute(stmnt)
-        rows = list(self.fetchAll())
 
-        if rows is None:
-            return None
-            
+        rows = []
+        row = self.fetchOne()
+        while row is not None:
+            rows.append(row)
+            if len(rows) % 100 == 0:
+                print(".", end='')
+            row = self.fetchOne()
+
         nSamples = len(rows)//nWavelengths
         if nSamples == 0:
             return None
 
         spectra = np.zeros(shape=(nWavelengths, nSamples))
-        wineIdentifiers = [""]*nSamples
+        spectrumIdentifiers = [""]*nSamples
         for i,row in enumerate(rows):
             spectra[i%nWavelengths, i//nWavelengths] = float(row['intensity'])
-            match = re.search(r"([A-Z]+)_?\d+.txt", row["path"])
-            if match is not None:
-                wineIdentifiers[i//nWavelengths] = match.group(1)
+            spectrumIdentifiers[i//nWavelengths] = row['spectrumId']
+
+        return spectra, spectrumIdentifiers
+
+    def subtractFluorescence(self, rawSpectra, polynomialDegree=5):
+
+        """
+        Remove fluorescence background from the data.
+        :return: A corrected data without the background.
+        """
+
+        correctedSpectra = np.empty_like(rawSpectra)
+        for i in range(rawSpectra.shape[1]):
+            spectrum = rawSpectra[:, i]
+            correctedSpectra[:, i] = BaselineRemoval(spectrum).IModPoly(polynomialDegree)
 
-        return spectra, wineIdentifiers
+        return correctedSpectra
 
     def showProgressBar(self, iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
         """
diff --git a/testDatabase.py b/testDatabase.py
index 2d2bdca..9678632 100644
--- a/testDatabase.py
+++ b/testDatabase.py
@@ -4,67 +4,167 @@
 import os
 from ramandb import RamanDB
 import requests
+import re
 
-class TestBuildDatabase(unittest.TestCase):
-    def testDatabase(self):
-        db = RamanDB()
-        self.assertIsNotNone(db)
-        self.assertTrue(os.path.exists(db.databasePath))
+class TestRamanDatabase(unittest.TestCase):
+    def setUp(self):
+        self.db = RamanDB()
+        # self.db = RamanDB("mysql://127.0.0.1/root@raman")
+        self.assertIsNotNone(self.db)
 
-    def testWavelengths(self):
-        db = RamanDB()
-        self.assertIsNotNone(db.getWavelengths())
-        self.assertEqual(len(db.getWavelengths()), 1044)
-
-    def testWavelengthsProperty(self):
-        db = RamanDB()
-        self.assertIsNotNone(db.wavelengths)
-        self.assertEqual(len(db.wavelengths), 1044)
-
-    def testFileCount(self):
-        db = RamanDB()
-        self.assertIsNotNone(db.getCountFiles())
-        self.assertEqual(db.getCountFiles(), 709)
-
-    def testFilePaths(self):
-        db = RamanDB()
-        self.assertIsNotNone(db.getSpectraPaths())
-        self.assertEqual(db.getCountFiles(), len(db.getSpectraPaths()))
-
-    def testGetIntensity(self):
-        db = RamanDB()
-        matrix, labels = db.getIntensities()
+    @unittest.skip("Now in setUp")
+    def test01Database(self):
+        self.db = RamanDB()
+        self.assertIsNotNone(self.db)
+
+    def test02Wavelengths(self):
+        self.assertIsNotNone(self.db.getWavelengths())
+
+    def test03WavelengthsAreUniqueAndCommon(self):
+        """
+        Check that all RAW spectra have the same number of wavelengths.
+        This is a complex SQL statement with a sub-select, but it returns 1 if true and 0 if false.
+        """
+        self.db.execute("""
+        SELECT 
+        MAX(spectralPts) = MIN(spectralPts) as wavelengthsAreAllTheSame
+        FROM
+            (SELECT 
+                COUNT(wavelength) AS spectralPts
+            FROM
+                spectra
+            where dataType='raw'
+            GROUP BY wavelength) AS something;
+        """)
+        firstRecord = self.db.fetchOne()
+        self.assertEqual(firstRecord["wavelengthsAreAllTheSame"], 1)
+
+    def test04WavelengthsProperty(self):
+        self.assertIsNotNone(self.db.wavelengths)
+
+    def test05FileCount(self):
+        self.assertIsNotNone(self.db.getFileCount())
+
+    def test06FileCountShouldMatchRawSpectraTimesWavelength(self):
+        """
+        NUmber of points in the spectra database for 'raw' spectra should be #wavelengths x #files
+        """
+        rawSpectraCount = self.db.getFileCount()
+        wavelengthsCount = len(self.db.getWavelengths())
+
+        self.db.execute("select count(*) as count from spectra where dataType='raw'")
+        valueRecord = self.db.fetchOne()
+        self.assertEqual(valueRecord["count"], rawSpectraCount*wavelengthsCount)
+
+    def test07FilePaths(self):
+        self.assertIsNotNone(self.db.getSpectraPaths())
+        self.assertEqual(self.db.getFileCount(), len(self.db.getSpectraPaths()))
+
+    def test08GetWhiteSpectra(self):
+        self.db.execute("select count(*) as count from files inner join wines on wines.wineId = files.wineId where wines.color = 'white'")
+        firstRecord = self.db.fetchOne()
+        whiteWineFileCount = firstRecord["count"]
+
+        matrix, labels = self.db.getSpectraWithId(dataType='raw', color='white')
+        self.assertIsNotNone(matrix)
+
+        self.assertEqual(matrix.shape, (len(self.db.wavelengths), whiteWineFileCount))
+
+    def test09GetRedSpectra(self):
+        self.db.execute("select count(*) as count from files inner join wines on wines.wineId = files.wineId where wines.color = 'red'")
+        firstRecord = self.db.fetchOne()
+        redWineFileCount = firstRecord["count"]
+
+        matrix, labels = self.db.getSpectraWithId(dataType='raw', color='red')
         self.assertIsNotNone(matrix)
-        self.assertEqual(matrix.shape, (len(db.wavelengths), db.getCountFiles()))
-
-    @unittest.skip("Ok, tested")
-    def testDownload(self):
-        url = 'https://www.dropbox.com/s/2st0sv7jpii6dz8/raman.db?dl=1'
-        r = requests.get(url, allow_redirects=True)
-        with open('test.db', 'wb') as file:
-            file.write(r.content)
-
-    @unittest.skip("Ok, tested")
-    def testDownload(self):
-        db = RamanDB()
-        filename = db.downloadDatabase()
-        self.assertTrue(os.path.exists(filename))
-        os.remove(filename)
-
-    @unittest.skip("Done, no need to redo.")
-    def testAddFileIdToDatabase(self):
-        db = RamanDB(writePermission=True)
-        db.execute("select * from files order by path")
-        records = db.fetchAll()
-        for i, record in enumerate(records):
-            db.execute("update files set fid={0} where md5='{1}'".format(i, record["md5"]))
-
-        db.execute("select spectra.md5, files.fid from spectra inner join files on files.md5 = spectra.md5")
-        records = db.fetchAll()
-        for i, record in enumerate(records):
-            statement = "update spectra set fid={0} where md5='{1}'".format(record["fid"], record["md5"])
-            db.execute(statement)
 
+        self.assertEqual(matrix.shape, (len(self.db.wavelengths), redWineFileCount))
+
+    def test10ReadQEProFile(self):
+        wavelengths, intensities = self.db.readQEProFile('originaldata/Q100.txt')
+        self.assertEqual(len(intensities), 1044)
+
+    def test11InsertAllSpectra(self):
+        dataDir = 'originaldata'
+        filenames = os.listdir(dataDir)
+        filePaths = []
+        for filename in filenames:
+            filePaths.append(os.path.join(dataDir, filename))
+
+        inserted = self.db.insertSpectralDataFromFiles(filePaths)
+        if inserted == 0:
+            self.skipTest("Nothing was inserted")
+
+    def test12ExecuteCount(self):
+        self.assertTrue(self.db.executeCount("select count(*) as count from spectra") > 0)
+
+    def test13InsertAllCorrectedSpectra(self):
+        self.db.execute("select distinct spectrumId from spectra where spectrumId not in (select spectrumId from spectra where dataType='fluorescence-corrected')")
+        records = self.db.fetchAll()
+        if len(records) == 0:
+            self.skipTest("All corrected spectra exist in the database")
+
+        for record in records:
+            spectrumId = record["spectrumId"]
+            spectrum, labels = self.db.getSpectrum(dataType='raw', spectrumId=spectrumId)
+            if spectrum is None:
+                continue
+            degree = 100
+            correctedSpectrum = self.db.subtractFluorescence(spectrum, polynomialDegree=degree)
+            print(spectrumId)
+            match = re.search(r"(\d+)-(\d+)", spectrumId)
+            wineId = int(match.group(1))
+            sampleId = int(match.group(2))
+            self.db.insertSpectralData(self.db.wavelengths, correctedSpectrum[:,:], 'fluorescence-corrected', wineId, sampleId, 'BaselineRemoval-nomask-degree{0}'.format(degree))
+
+    @unittest.skip("done")
+    def test14BuildWineIdAndSampleId(self):
+        self.db.execute('update files set sampleId=substr(path,18,2) where path like "%\_%" ESCAPE "\"')
+
+    def test15WinesSummary(self):
+        wineSummary = self.db.getWinesSummary()
+        totalNumberOfSpectra = sum([ wine["nSamples"] for wine in wineSummary])
+
+        self.db.execute("select count(*) as count from spectra where dataType='raw'")
+        valueRecord = self.db.fetchOne()
+        self.assertEqual(valueRecord["count"], totalNumberOfSpectra*len(self.db.getWavelengths()))
+
+    def test16SingleSpectrum(self):
+        self.db.execute("select wavelength, intensity from spectra where spectrumId = '0002-0001'")
+        records = self.db.fetchAll()
+        for record in records:
+            print(record)
+
+    def test17DataTypes(self):
+        self.assertTrue('raw' in self.db.getDataTypes())
+
+    def test18GetSpectraValidTypeFluorescence(self):
+        if 'fluorescence-corrected' in self.db.getDataTypes():
+            spectra, spectrumIds = self.db.getSpectraWithId(dataType='fluorescence-corrected')
+            self.assertIsNotNone(spectra)
+        else:
+            self.skipTest("No background-corrected spectra in database")
+
+    def test19GetSpectraInvalidType(self):
+        with self.assertRaises(ValueError):
+            spectra = self.db.getSpectraWithId(dataType='unknown')
+
+    @unittest.skip("Only on dccote's computer")
+    def test20DatabaseMySQLLocal(self):
+        db = RamanDB("mysql://127.0.0.1/root@raman")
+        self.assertIsNotNone(db)
+        self.assertIsNotNone(db.getWavelengths())
+
+    def test21Wavenumbers(self):
+        self.assertIsNotNone(self.db.wavenumbers)
+
+    def test22Mask(self):
+        print(sum(self.db.wavelengthMask))
+        maskRange = []
+        for i, mask in enumerate(self.db.wavelengthMask):
+            if mask:
+                maskRange.append(i)
+        print(self.db.wavelengths[maskRange])
 
 if __name__ == "__main__":
     unittest.main()
\ No newline at end of file
diff --git a/testVino.py b/testVino.py
new file mode 100644
index 0000000..6d54842
--- /dev/null
+++ b/testVino.py
@@ -0,0 +1,81 @@
+import unittest
+import numpy as np
+from dcclab import Database
+import os
+from ramandb import RamanDB
+import requests
+import matplotlib.pyplot as plt
+from vino import vinoPCA
+
+class TestVInoClass(unittest.TestCase):
+    @unittest.skip("NOt now")
+    def testInit(self):
+        iterable = [31, 30, 30, 30, 80, 31, 33, 31, 30, 30, 30, 30, 30, 30, 30, 30, 104, 30, 30] # sans vin blanc parceque ça shit le aspect ratio
+        total = sum(iterable)
+
+        # Data = np.genfromtxt('/Users/Shooshoo/PycharmProjects/PCA_DCCLab/DataVino_Sorted.csv', delimiter=',')
+        db = RamanDB()
+        data, labels = db.getIntensities()
+        wavelengths = db.getWavelengths()
+        data = np.cat(wavelengths, wavelengths, data[:,0:total])
+        self.assertEqual(data.shape[1], total)
+        my_Spectrums = vinoPCA(data, iterable)
+
+        self.assertIsNotNone(my_Spectrums)
+
+    def testRemoveFluo(self):
+        iterable = [31, 30, 30, 30, 80, 31, 33, 31, 30, 30, 30, 30, 30, 30, 30, 30, 104, 30, 30] # sans vin blanc parceque ça shit le aspect ratio
+        total = sum(iterable)
+
+        # I need to remove this function, I don't have access to the csv file.
+        # Data = np.genfromtxt('/Users/Shooshoo/PycharmProjects/PCA_DCCLab/DataVino_Sorted.csv', delimiter=',')
+        # After a bit of playing around: column 0 is not used, column 1 is the wavelengths, then its
+        # the data
+        my_Spectrums = vinoPCA()
+
+        self.assertIsNotNone(my_Spectrums)
+
+        my_Spectrums.subtractFluorescence()
+
+
+    def testDoPCA(self):
+        my_Spectrums = vinoPCA()
+
+        self.assertIsNotNone(my_Spectrums)
+
+        my_Spectrums.doPCA(10)
+        my_Spectrums.showTransformedData3D()
+        my_Spectrums.showTransformedData2D()
+        my_Spectrums.showEigenvectors()
+
+    def testvinoPCANoArgument(self):
+        my_Spectrums = vinoPCA()
+        self.assertIsNotNone(my_Spectrums)
+
+        my_Spectrums.doPCA(3)
+        my_Spectrums.showTransformedData3D()
+        my_Spectrums.showTransformedData2D()
+        my_Spectrums.showEigenvectors()
+
+    # def testInitDB(self):
+    #     self.assertIsNotNone(vinoPCA().db)
+
+    def testColormap(self):
+        vino = vinoPCA()
+        cm = vino.getColorMap()
+        self.assertIsNotNone(cm)
+        spectra, labels = vino.db.getSpectraWithId()
+        self.assertEqual(len(cm), len(labels))
+
+    # def testOneSpectrum(self):
+    #     vino = vinoPCA()
+    #     spectra, labels = vino.db.getIntensities()
+    #     plt.plot(spectra[:,1])
+    #     newSpectra = vino.removeFLuo(spectra)
+    #     print(newSpectra)
+    #     # plt.plot(newSpectra)
+    #     # plt.show()
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/vino.py b/vino.py
index 2f40e0f..01db57c 100644
--- a/vino.py
+++ b/vino.py
@@ -4,19 +4,23 @@
 from sklearn.decomposition import PCA
 from scipy import interpolate
 from BaselineRemoval import BaselineRemoval
-
+from ramandb import RamanDB
 
 class vinoPCA:
 
-    def __init__(self, Data, numberOfEachSamples):
+    def __init__(self):
+        self.db = RamanDB()
+        self.constraints = []
+        self.data, self.labels = self.db.getSpectraWithId(dataType='raw')
+        self.correctedData, correctedLabel = self.db.getSpectraWithId(dataType='fluorescence-corrected')
+        if self.labels != correctedLabel:
+            raise ValueError('Not all spectra are corrected')
 
-        """
-        :param Data: The data on wich PCA should be done.
-        :param colormap: An iterable that contains how many of each samples there is in Data, in the good order.
-        """
+        self.wavelengths = self.db.getWavelengths()
 
-        self.Data = Data
-        self.numberOfEachSamples = numberOfEachSamples
+        self.wavelengthMask = self.db.wavelengthMask
+        self.data = self.data[self.wavelengthMask, :]
+        self.wavelengths = self.wavelengths[self.wavelengthMask]
 
     def getColorMap(self):
 
@@ -25,50 +29,32 @@ def getColorMap(self):
         :return: Return a colormap to visualise different samples on the plot.
         """
 
-        for i in range(0, len(self.numberOfEachSamples)):
-            if i == 0:
-                colormap = np.zeros(self.numberOfEachSamples[0])
-            else:
-                colormap = np.append(colormap, np.ones(self.numberOfEachSamples[i]) *5*i)
+        uniqueLabelsInOrder = sorted(set(self.labels))
+        possibleColorsInOrder = range(len(uniqueLabelsInOrder))
+        colors = {}
+        for identifier, color in zip(uniqueLabelsInOrder, possibleColorsInOrder):
+            colors[identifier] = color*5
+
+        colormap = []
+        for identifier in self.labels:
+            colormap.append(colors[identifier])
 
-        return colormap
+        return np.array(colormap)
 
-    def removeFLuo(self, Data):
+    def subtractFluorescence(self):
 
         """
-        Remove fluorescence background from the data given.
-        :param Data: The Data from witch you wish to remove fluo background.
-        :return: A new set of Data without the background.
+        Remove fluorescence background from the data.
+        :return: A corrected data without the background.
         """
 
-        nm = Data[:, 1]
-        cm = 1 / (632.8e-9) - 1 / (nm * 1e-9)
-        size = np.ma.size(Data, 1)
-        polynomial_degree = 100
-        filtered_datas = np.zeros(shape=(800, size - 1))
+        polynomial_degree = 5
+        correctedSpectra = np.empty_like(self.data)
+        for i in range(self.data.shape[1]):
+            spectre = self.data[:, i]
+            correctedSpectra[:, i] = BaselineRemoval(spectre).IModPoly(polynomial_degree)
 
-        # for column in range(2, size):
-        #     y = Data[:, column]
-        #     d = 25
-        #     f2 = interpolate.interp1d(cm[199:][::d], y[199:][::d], kind='quadratic')
-        #     y = y[200:1000] - f2(cm[200:1000])
-        #     y = (y - min(y)) / max(y - min(y))
-        #     filt_datas[:, column - 1] = y
-        # filt_datas[:, 0] = cm[200:1000]
-
-        for column in range(2, size):
-            spectre = Data[200:1000, column]
-            baseObj = BaselineRemoval(spectre)
-            values = baseObj.IModPoly(polynomial_degree)
-            # values = values - min(values) # Si tu normalises, tu perds les composants communs (Alcool particulèrement)
-            # values = values/max(values)   # tu perds aussi le degrés de présence (Plus ou moins bouchonné ?)
-                                            # Si tu normalises pas, tu favorises les composants communs présents à
-                                            # différents degrés (Plus ou moins d'alcool). Donc tester avec et sans?
-            filtered_datas[:, column - 1] = values
-
-        filtered_datas[:, 0] = Data[200:1000, 1]
-
-        return filtered_datas
+        return correctedSpectra
 
     def doPCA(self, n:int):
 
@@ -77,11 +63,9 @@ def doPCA(self, n:int):
         :param n: number of componants to get from the PCA
         :return: Returns nothing. Just creats an array of the transformed datas into the new vector space
         """
-
-        new_Datas = self.removeFLuo(self.Data)
-        new_Datas = np.transpose(new_Datas)
-        self.X_PCA = PCA(n_components=n)
-        self.X_reduced = self.X_PCA.fit_transform(new_Datas[1:, :])
+        self.pca = PCA(n_components=n)
+        correctedData = self.subtractFluorescence()
+        self.X_reduced = self.pca.fit_transform(correctedData.T)
 
     def showTransformedData3D(self):
 
@@ -94,9 +78,9 @@ def showTransformedData3D(self):
         fig = plt.figure(1, figsize=(8, 6))
         ax = Axes3D(fig, elev=-150, azim=110)
         ax.scatter(
-            self.X_reduced[:700, 0],
-            self.X_reduced[:700, 1],
-            self.X_reduced[:700, 2],
+            self.X_reduced[:, 0],
+            self.X_reduced[:, 1],
+            self.X_reduced[:, 2],
             c=self.getColorMap(),
             cmap='nipy_spectral',
             s=10)
@@ -118,7 +102,7 @@ def showTransformedData2D(self):
 
         plt.clf()
         plt.figure(2)
-        plt.scatter(self.X_reduced[:700, 0], self.X_reduced[:700, 1], c=self.getColorMap(), cmap='nipy_spectral', s=10)
+        plt.scatter(self.X_reduced[:, 0], self.X_reduced[:, 1], c=self.getColorMap(), cmap='nipy_spectral', s=10)
         plt.title('First two PCA directions')
         plt.xlabel('1st eigenvector')
         plt.ylabel('2nd eigenvector')
@@ -138,7 +122,7 @@ def getAllEigenvectors(self):
         :return: an array of n eigenvector
         """
 
-        return self.X_PCA.components_.transpose()
+        return self.pca.components_.transpose()
 
     def showEigenvectors(self):
 
@@ -148,13 +132,13 @@ def showEigenvectors(self):
         """
         plt.figure(3)
         plt.title('1st eigenvector')
-        plt.plot(self.X_PCA.components_.transpose()[:, 0])
+        plt.plot(self.pca.components_.transpose()[:, 0])
         plt.figure(4)
         plt.title('2nd eigenvector')
-        plt.plot(self.X_PCA.components_.transpose()[:, 1])
+        plt.plot(self.pca.components_.transpose()[:, 1])
         plt.figure(5)
         plt.title('3rd eigenvector')
-        plt.plot(self.X_PCA.components_.transpose()[:, 2])
+        plt.plot(self.pca.components_.transpose()[:, 2])
         plt.show()
 
     def getTransformedDatas(self):
@@ -173,7 +157,7 @@ def getScreeValues(self):
         :return: array of the scree values, from most important to least
         """
 
-        return self.X_PCA.explained_variance_ratio_
+        return self.pca.explained_variance_ratio_
 
     def plotScreeValues(self):
 
diff --git a/wines.txt b/wines.txt
new file mode 100644
index 0000000..3b19938
--- /dev/null
+++ b/wines.txt
@@ -0,0 +1,27 @@
+A	2022/01/12	Wine	Sirius Bordeaux 2018	https://www.saq.com/en/223537	VPN	France		Merlot, Cabernet Sauvignon	2.2	red	13
+B	2022/01/12	Wine	Ménage à Trois 2019	https://www.saq.com/en/10709152	VPN	United States		Cabernet Sauvignon	4.3	red	13.5
+C	2022/01/22	Wine	Woodbridge by Robert Mondavi	https://www.saq.com/en/48611	VPN	United States		Cabernet Sauvignon	7.3	red	13.5
+D	2022/01/28	Wine	Les Jamelles Pinot Noir Pays d'Oc	https://www.saq.com/en/10802904	VPN	France		point noir	4	red	13
+E	2022/01/27	Wine	Monasterio de las Vinas	https://www.saq.com/en/854422	VPN	Spain		70% Garnacha, 20% Tempranillo, 10% Carinena	2.1	red	13.5
+F	2022/02/05	Wine	Revolution	https://www.saq.com/en/12166892	EP	United States		Ruby cabernet 50 %, Carignan 32 %, Syrah 18 %	10	red	13.5
+G	2022/02/12	Wine	Milhistoraise	https://www.saq.com/en/13794111	EP	Spain		Grenache	1.7	red	14
+H	2022/02/13	Wine	Wallaroo Trail Shiraz	https://www.saq.com/en/12498459	EP	Australia		Shiraz 85 %, Cabernet sauvignon 10 %, Petit verdot 5 %	11	red	13.5
+I	2022/02/13	Wine	Toro loco	https://futailles.com/en/products/wine/red/toro-loco	EP	Spain		Tempranillo	0	red	12.5
+J	2022/02/13	Wine	Cantini	https://vinstriani.com/produits/cantini-rouge.html	EP	Italy		Sangiovese, Montepulciano, and Cabernet Sauvignon	-	red	12
+K	2022/02/13	Wine	Nicolas laloux	https://www.vinsenepicerie.com/en/nicolas-laloux-1/	EP	Ontario.Canada		Cabernet Sauvignon	-	red	12.5
+L	2022/02/13	Wine	smoky bay SHIRAZ	https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/red-wine-14001/smoky-bay-shiraz-17650#.YguvavXMIUo	EP	Australia		Shiraz	10	red	13
+M	2022/02/13	Wine	Dolce Venti	https://futailles.com/en/products/wine/red/dolce-venti	EP	Italy		Merlot	-	red	11.5
+N	2022/02/13	Wine	Aroma mi Amore	https://vinsarista.com/en/produit/wines/aroma-mi-amore/aroma-mi-amore-red-wine/	EP	Italy		Refosco	-	red	14.5
+O	2022/02/19	Wine	Sonho Aragonez	https://www.vivino.com/CA/en/sonho-aragonez/w/5905886	EP	Portugal		Aragonez		red	12.5
+P	2022/02/27	Wine	Double vie	https://vinsarista.com/en/produit/wines/double-vie/red-wine/	EP	Canada				red	12
+Q	2022/02/28	Wine	Danza	https://www.iga.net/en/product/wineargentinian-red-bonarda/00000_000000082424300222	EP	Argentina		Douce noir		red	13.7
+R	2022/02/23	Wine	bu	https://www.iga.net/en/product/winered-rosso-terre-sicilaine-bio-it/00000_000000005604913702	EP	Italy		Nero d'Avola 70% + Merlot 20% + Syrah 10%		red	12.5
+S	2022/02/24	Wine	Croix d'Or	https://futailles.com/en/products/wine/red/croix-dor	EP	Moldavie		pinot noir		red	12.5
+T	2022/02/18	Wine	AUFKELLEREIEN	https://www.iga.net/fr/produit/vin-blancallemagne---fruite-et-doux-9--alcool---18-ans--/00000_000000005604980687	AR	Allemagne				white	9
+U	2022/02/15	Wine	Macon Lugny les Cray	https://www.saq.com/en/13319061	DC	France	Bourgogne			white	
+V	2022/02/16	Wine	Brumont Cotes de Gascogne	https://www.saq.com/en/548883	DC	France		Sauvignon, Gros Manseng		white	12
+W	2022/02/18	Wine	Piuze	https://www.saq.com/en/14853741	DC	France		Chardonnay		white	12
+X	2022/02/19	Wine	Chateau de Maligny	https://www.saq.com/en/560763	DC	France	Chablis	Chardonnay		white	12.5
+Y	2022/02/21	Wine	L'impromptu	https://www.saq.com/en/13343264	DC	France		Gamay		red	14
+Z	2022/02/22	Wine	Sancerres Aurore Dezat	https://www.saq.com/en/13992897	DC	France	Sancerre	Chardonnay	1.6	white	12.5
+AA	2022/02/26	Wine	Lord de la Ragotiere	https://www.saq.com/en/10690501	DC	France		Chardonnay		white	12
\ No newline at end of file