jonnor · jonnor · Oct 4, 2025 · Aug 3, 2025 · Aug 3, 2025 · Aug 7, 2025
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -22,6 +22,45 @@ jobs:
     - name: Install firmware dependencies
       run: bash -xe install_unix.sh
     - name: Install Python dependencies
-      run: pip install -r requirements.txt
+      run: pip install .
     - name: Run firmware tests
       run: micropython firmware/test_toothbrush.py
+
+  model:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+    - name: Install OS dependencies
+      run: sudo add-apt-repository universe
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+    - name: Install Python dependencies
+      run: pip install .[dev]
+
+    - name: Install Python dependencies
+      run: pip install emlearn-micropython/examples/har_trees
+    - name: Run model train/evaluation
+      env:
+        PYTHONPATH: emlearn-micropython/examples/har_trees
+      run: python -m software.model.evaluate --dataset combined --data-dir data/jonnor-brushing-1 --config data/jonnor-brushing-1/config.yaml
+
+  dataset:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+    - name: Install OS dependencies
+      run: sudo add-apt-repository universe
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+    - name: Install Python dependencies
+      run: pip install .[dev]
+    - name: Create combined dataset from sensor-data and labels
+      env:
+        PYTHONPATH: emlearn-micropython/examples/har_trees
+      run: python -m software.dataset.combine --data ./data/jonnor-brushing-1/har_record/ --samplerate 50 --columns acc_x,acc_y,acc_z --out combined2.parquet --labels data/jonnor-brushing-1/labels/project-7-at-2024-12-31-23-50-84589958.csv  --sessions data/jonnor-brushing-1/videos.csv
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "emlearn-micropython"]
+	path = emlearn-micropython
+	url = https://github.com/emlearn/emlearn-micropython.git
diff --git a/README.md b/README.md
@@ -113,7 +113,7 @@ Using a virtual environment is recommended.
 Install project dependencies
 
 ```
-pip install -r requirements.txt
+pip install -e .
 ```
 
 #### Flash MicroPython to device
@@ -137,10 +137,6 @@ mpremote mip install https://github.com/emlearn/emlearn-micropython/raw/refs/hea
 Additional dependencies to use har_record.py for recording data
 ```
 mpremote mip install https://github.com/emlearn/emlearn-micropython/raw/refs/heads/master/examples/har_trees/recorder.py
-mpremote mip install https://raw.githubusercontent.com/emlearn/emlearn-micropython/refs/heads/master/examples/har_trees/color_setup.py
-mpremote mip install "github:peterhinch/micropython-nano-gui/drivers/st7789"
-mpremote mip install "github:peterhinch/micropython-nano-gui"
-mpremote mip install "github:peterhinch/micropython-async/v3/primitives"
 ```
 
 The GUI libraries are not used by the main firmware,
@@ -150,7 +146,7 @@ but is used by some of the tools like for data-recording.
 
 Copy the firmware files
 ```
-mpremote cp firmware/core.py firmware/process.py firmware/brushing.trees.csv firmware/main.py :
+mpremote cp firmware/core.py firmware/buzzer_music.py firmware/process.py firmware/brushing.trees.csv firmware/main.py :
 ```
 
 Start the application and observe log
@@ -181,6 +177,18 @@ micropython firmware/test_toothbrush.py
 TODO: check tests on device, document how to run
 ```
 
+#### Training model
+
+```
+TODO: document how to run
+See the Gitlab CI actions
+```
+
+#### Modifying dataset
+
+```
+See doc/data_collection.md
+```
 
 #### Porting to other devices
 

diff --git a/doc/TODO.md b/doc/TODO.md
@@ -1,17 +1,62 @@
 
 # TODO
 
-#### First PoC
+## Flex handle
+
+- Record new demo video.
+Previous version needed to use zipties, was quite large.
+Easy mounting onto toothbrush, without any tools.
+Tested on variety of brushes I could find in the store, from thinnest to the thickest.
+Printed in flexible TPU, using NinjaFlex
+
+## Zephyr firmware version
+
+Using XIAO BLE Sense NRF52840.
+Initially with MicroPython, but with an aim to have pure C version.
+
+#### Mini dataset v2
+
+- Get timeseries/video to work in LabelStudio
+- Record at least 3 sessions
+- Label all the sessions
+- Test data using the dataset and training pipelines
+- Improve the notes in [data_collection.md](./data_collection.md)
+
+#### Running in C
+
+Related
+https://github.com/jonnor/zephyr/blob/emlearn-sensor-readout/samples/modules/emlearn/sensor_reader/src/main.c
+
+- Implement gravity separation using eml_iir
+- Fixup the C feature extraction code.
+- Support running C feature extraction in pipeline. CSV, gcc, and subprocess
+- Setup/run evaluation pipeline on validation/testset on device. CSV
+- Test live predictions on device
+
+Later
+
+- Port LSM6DS3 FIFO driver to C/Zephyr
+- Use .npy instead of .csv
+- Holder. Try add in emlearn logo on bottom
+
+## Multi-participant dataset
+
+Ref [data_collection.md](./data_collection.md)
+
+- Get volunteers that are interested in participating
+- Do a trial run at home
+- Schedule a time to do the data recording
+
+
+## Pipeline improvements
 
-- Clean up the ML pipeline for dataset prep
-- Run unit-tests on device
-- Implement tests for sad case
-- Record more data. Get up to 8 or 10 sessions total.
 - Setup quantitative evaluation of the time tracking.
+Note, some starts in the notebooks.
 Cut out random selections of time-sections.
-Respect train/test folds
+Respect train/test folds.
+- Implement tests for state machine for sad case
 
-#### More features
+## More features
 
 Bluetooth connectivity
 

diff --git a/doc/data_collection.md b/doc/data_collection.md
@@ -76,13 +76,108 @@ NOTE: not used
 - Driving with X in backpack
 - Biking with X in backpack
 
+### Potential confusors
+
+Things that have similar data characteristics.
+Especially those that also can be expected to co-occur in ordinary real-world usage.
+
+Periodic alternating motion with 2-5 Hz.
+Fast walk / jogging / running?
+
+## Video recording
+
+Use "Open Camera" on Andoid phone.
+
+Video settings
+```
+Resolution                  720p
+Orientation.                Portrait
+Framerate.                  30 fps
+Format.                     MPEG4 H264
+Bitrate.                    3 Mbps
+```
+
+Estimated 25 MB per 1 minute, 75 MB pre 3 minute session.
+Still just a few GB for 20 sessions.
+
+Going down to 1 Mbps gave noticably worse results.
+
+! check that this open nicely in Label Studio.
+
+## Dataset pipeline structure
+
+```
+Raw data:
+
+    Session metadata            /sessions.csv
+    Sensor data.                /har_record/$session/*.npy
+    Video from phone.           /videos/$session/X.mkv
+
+For labeling
+
+    Video. One video (URL) per session
+    Timeseries. One CSV per session
+    Task list. CSV/JSON with one row per session. URL for video and timeseries
+    Annotation template
+
+After labeling
+
+    Labels from Label Studio.   /labels/project.csv
+
+After combining
+
+    Combined data               /combined.parquet
+```
+
+## Session metadata
+
+```
+participant
+device
+location
+brush
+```
+
+
+## Video access
+
+! videos need to be on a URL to be accessible.
+Can be localhost if using Label Studio locally?
+
+Not all participants may allow open access to the video.
+Generally this should be something authenticated.
+Might need to use pre-signed / anonymous URLs.
+
+How to identify data in the Label Studio output?
+Session identifiers should be in the video/timeseries URLs
+
+# Aligning / time syncronization
+
+Open video, find location of the first sync (in seconds)
+
+??? Where does the time syncronization happen
+Would want to do before
+
+
+https://labelstud.io/templates/timeseries_audio_video
+! Must set frameRate
+
+
 
 # Labeling notes
 
 Labeling precision target.
-Better than 1 second. Not needed as good as 100 ms.
+Better than 1 second.
+Not needed as good as 100 ms.
+
+# Video notes
+
+Used this to get a more compressed video file.
+15 MB per 3 minutes.
 
-First target. 5 sessions, 1 subject.
+Recommendations by Label Studio
+https://labelstud.io/tags/video#Video-format
+! also specify -r 30 for constant-frame-rate
 
 ffmpeg -i input.avi -c:a copy -vf "scale=-2:720" -c:v libx264 -pix_fmt yuv420p -crf 23 output.mkv
 

diff --git a/doc/labeling.xml b/doc/labeling.xml
@@ -1,15 +1,40 @@
-<!-- Configuration for Label Studio -->
 <View>
-    <TimeSeries name="ts" valueType="url" value="$timeseriesUrl" sep="," timeColumn="elapsed" overviewWidth="30s" >
-        <!-- Data is expected to be in range -2G to +2G, each file up to 5 minutes long -->
-      <Channel column="brushing_energy" displayFormat=",.1f" strokeColor="#ff0000" legend="Y" dataRange="-0.0,+1.0"/>
-      <Channel column="brush_orientation" displayFormat=",.1f" strokeColor="#0000ff" legend="X" dataRange="-1.0,+1.0"/>
-    </TimeSeries>
+  <Video name="video" value="$video" frameRate="30" sync="group_a"/>
 
-    <Header value="Classes:"/>
+  <TimeSeriesLabels name="timelinelabels" toName="motion">
+    <Label value="sync"/>
+    <Label value="brushing"/>
+  </TimeSeriesLabels>  
+
+  <TimeSeries 
+              name="orientation"
+              value="$sensor_data"
+              sync="group_a"
+              timeColumn="time"
+              timeFormat="%H:%M:%S.%f"
+              timeDisplayFormat="%H:%M:%S.%f"
+              overviewWidth="10%"
+              fixedScale="true"
+  >
+    <MultiChannel>
+      <Channel column="tilt" strokeColor="#FF0000" height="100"/>
+      <Channel column="pitch" strokeColor="#00FF00" height="100"/>
+    </MultiChannel>
+  </TimeSeries>
 
-  	<TimeSeriesLabels name="label" toName="ts">
-      <Label value="docked"/>
-      <Label value="brushing"/>
-	</TimeSeriesLabels>
+  <TimeSeries 
+              name="motion" 
+              value="$sensor_data"
+              sync="group_a"
+              timeColumn="time"
+              timeFormat="%H:%M:%S.%f"
+              timeDisplayFormat="%H:%M:%S.%f"
+              overviewWidth="10%"
+              fixedScale="true"
+  >
+    <Channel column="linear_acc_x" strokeColor="#0000FF" height="100"/>
+    <Channel column="linear_acc_y" strokeColor="#FF00FF" height="100"/>
+    <Channel column="linear_acc_z" strokeColor="#00FF00" height="100"/>
+  </TimeSeries>
+
 </View>