diff --git a/training/object-detection/.dvc/.gitignore b/training/object-detection/.dvc/.gitignore
new file mode 100644
index 0000000..528f30c
--- /dev/null
+++ b/training/object-detection/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/training/object-detection/.dvc/config b/training/object-detection/.dvc/config
new file mode 100644
index 0000000..8b006d6
--- /dev/null
+++ b/training/object-detection/.dvc/config
@@ -0,0 +1,4 @@
+[core]
+    remote = storage
+['remote "storage"']
+    url = s3://salmonvision-dvc/rgb_object_detection
diff --git a/training/object-detection/.dvcignore b/training/object-detection/.dvcignore
new file mode 100644
index 0000000..5197305
--- /dev/null
+++ b/training/object-detection/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/training/object-detection/.gitignore b/training/object-detection/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/.python-version b/training/object-detection/.python-version
new file mode 100644
index 0000000..cc1923a
--- /dev/null
+++ b/training/object-detection/.python-version
@@ -0,0 +1 @@
+3.8
diff --git a/training/object-detection/README.md b/training/object-detection/README.md
new file mode 100644
index 0000000..21ab44c
--- /dev/null
+++ b/training/object-detection/README.md
@@ -0,0 +1,35 @@
+# Object Detection
+
+Training pipeline to train the SalmonVision object detection model.
+
+Install uv:
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+Install DVC:
+```bash
+uv tool install dvc
+```
+
+Install the module:
+```bash
+uv pip install -e .
+```
+
+Check dvc.yaml for the full pipeline.
+
+Run the following to run specific stages of the pipeline:
+```bash
+dvc repro stage_name
+```
+
+For example, building the model input annotations:
+```bash
+dvc repro build_model_input
+```
+
+Run tests with
+```
+uv run pytest
+```
diff --git a/training/object-detection/config/salmon_yolo.yaml b/training/object-detection/config/salmon_yolo.yaml
new file mode 100644
index 0000000..d1601dc
--- /dev/null
+++ b/training/object-detection/config/salmon_yolo.yaml
@@ -0,0 +1,28 @@
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+# Classes updated on 2026-02-12
+path: /training/export_combined_bear_kitwanga_yolo  # dataset root dir
+train: train.txt
+val: val.txt
+test: test.txt
+
+# Classes
+names:
+  0: Coho
+  1: Bull
+  2: Rainbow
+  3: Sockeye
+  4: Pink
+  5: Whitefish
+  6: Chinook
+  7: Shiner
+  8: Pikeminnow
+  9: Chum
+  10: Steelhead
+  11: Lamprey
+  12: Cutthroat
+  13: Stickleback
+  14: Sculpin
+  15: Jack_Coho
+  16: Jack_Chinook
+  17: Otter
+  18: Sucker
diff --git a/training/object-detection/data/01_raw/.gitignore b/training/object-detection/data/01_raw/.gitignore
new file mode 100644
index 0000000..767a4b6
--- /dev/null
+++ b/training/object-detection/data/01_raw/.gitignore
@@ -0,0 +1,4 @@
+/labelstudio_annos
+/salmon_vid_counts.csv
+/salmon_vid_counts_summary.csv
+/sv_water_conditions
diff --git a/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc b/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc
new file mode 100644
index 0000000..1023678
--- /dev/null
+++ b/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: b6136eee30a358d7473a160197bc91be
+  size: 6465051
+  hash: md5
+  path: salmon_vid_counts.csv
diff --git a/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc b/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc
new file mode 100644
index 0000000..4f4cbb3
--- /dev/null
+++ b/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: c65c2ba8214d26905ccb9608e9071b93
+  size: 1031
+  hash: md5
+  path: salmon_vid_counts_summary.csv
diff --git a/training/object-detection/data/01_raw/sv_water_conditions.dvc b/training/object-detection/data/01_raw/sv_water_conditions.dvc
new file mode 100644
index 0000000..f4cce12
--- /dev/null
+++ b/training/object-detection/data/01_raw/sv_water_conditions.dvc
@@ -0,0 +1,6 @@
+outs:
+- md5: 717ade5c2fd57a763609ab55d66bf351.dir
+  size: 199437
+  nfiles: 10
+  hash: md5
+  path: sv_water_conditions
diff --git a/training/object-detection/data/02_interim/.gitignore b/training/object-detection/data/02_interim/.gitignore
new file mode 100644
index 0000000..c412ac4
--- /dev/null
+++ b/training/object-detection/data/02_interim/.gitignore
@@ -0,0 +1,3 @@
+/yolo_annos
+/yolo_annos_unpacked
+/yolo_condition_negatives
diff --git a/training/object-detection/data/03_processed/.gitignore b/training/object-detection/data/03_processed/.gitignore
new file mode 100644
index 0000000..15e8ec8
--- /dev/null
+++ b/training/object-detection/data/03_processed/.gitignore
@@ -0,0 +1 @@
+/splits_baseline
diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock
new file mode 100644
index 0000000..76dccb0
--- /dev/null
+++ b/training/object-detection/dvc.lock
@@ -0,0 +1,143 @@
+schema: '2.0'
+stages:
+  update_raw:
+    cmd: rclone sync  -P --filter "- /salm_dataset*/**" --filter "+ *.json" 
+      --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/raw/ 
+      data/01_raw/labelstudio_annos
+    deps:
+    - path: s3://salmonvision-ml-datasets/rgb/raw
+      hash: md5
+      md5: 9aab20758f57f39971a38c9a676dad27.dir
+      size: 720238505
+      nfiles: 65
+    outs:
+    - path: data/01_raw/labelstudio_annos
+      hash: md5
+      md5: 188b71c31be326fe36d209cc52c23337.dir
+      size: 379352848
+      nfiles: 50
+  split_data:
+    cmd: rm -rf data/02_interim/yolo_annos_unpacked && scripts/unpack_annos.sh 
+      data/02_interim/yolo_annos data/02_interim/yolo_annos_unpacked && 
+      scripts/unpack_annos.sh data/02_interim/yolo_condition_negatives 
+      data/02_interim/yolo_annos_unpacked && scripts/make_splits.py 
+      --labels-root data/02_interim/yolo_annos_unpacked --out-dir 
+      data/03_processed/splits_baseline --sites tankeeah kitwanga bear --seed 42
+      --train-frac 0.8 --val-frac 0.1 --test-frac 0.1
+    deps:
+    - path: data/01_raw/salmon_vid_counts.csv
+      hash: md5
+      md5: b6136eee30a358d7473a160197bc91be
+      size: 6465051
+    - path: data/01_raw/salmon_vid_counts_summary.csv
+      hash: md5
+      md5: c65c2ba8214d26905ccb9608e9071b93
+      size: 1031
+    - path: data/02_interim/yolo_annos
+      hash: md5
+      md5: 88551957fa5e19f676175f29606a613a.dir
+      size: 308350474
+      nfiles: 5
+    - path: data/02_interim/yolo_condition_negatives
+      hash: md5
+      md5: c3505d079516f38874bdb42d2182cb46.dir
+      size: 338928
+      nfiles: 3
+    - path: scripts/make_splits.py
+      hash: md5
+      md5: 3921729fec8a5aa6c1eb25c54196a658
+      size: 650
+    - path: src/object_detection/splits
+      hash: md5
+      md5: 8ca8d1d4f2962a0e27f6fd50da90a8d2.dir
+      size: 35847
+      nfiles: 6
+    - path: src/object_detection/utils
+      hash: md5
+      md5: 5828afbf8a25f11a0f8e5bbc6c0065bf.dir
+      size: 762
+      nfiles: 4
+    outs:
+    - path: data/03_processed/splits_baseline
+      hash: md5
+      md5: e821f72b0bf42a09afafb688ce927ac4.dir
+      size: 16966114
+      nfiles: 5
+  build_model_input:
+    cmd: scripts/yolo_converter_ls_video.py data/01_raw/labelstudio_annos 
+      --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos_fs 
+      --out-shards data/02_interim/yolo_annos --empty-list 
+      data/02_interim/yolo_annos/empty_vids.txt --shard-size 100000 --pattern 
+      '**/*.json' --include-sites tankeeah kitwanga bear --frame-stride 3 
+      --frame-offset-mode video_hash --include-negatives --negative-ratio 0.10 
+      --negatives-per-video 11
+    deps:
+    - path: config/salmon_yolo.yaml
+      hash: md5
+      md5: f453f5dc54f1743eaedcc3ab117d269e
+      size: 547
+    - path: data/01_raw/labelstudio_annos
+      hash: md5
+      md5: 188b71c31be326fe36d209cc52c23337.dir
+      size: 379352848
+      nfiles: 50
+    - path: scripts/yolo_converter_ls_video.py
+      hash: md5
+      md5: 95696514ec77c20c6491c5af2ccb46a5
+      size: 118
+    - path: src/object_detection/utils
+      hash: md5
+      md5: 5828afbf8a25f11a0f8e5bbc6c0065bf.dir
+      size: 762
+      nfiles: 4
+    - path: src/object_detection/yolo_ls
+      hash: md5
+      md5: 80ca7160d1912861bbc2a013a45ac5d5.dir
+      size: 52757
+      nfiles: 10
+    outs:
+    - path: data/02_interim/yolo_annos
+      hash: md5
+      md5: 88551957fa5e19f676175f29606a613a.dir
+      size: 308350474
+      nfiles: 5
+  unpack_annos:
+    cmd: rm -r data/02_interim/yolo_annos_unpacked || scripts/unpack_annos.sh 
+      data/02_interim/yolo_annos data/02_interim/yolo_annos_unpacked
+    deps:
+    - path: data/02_interim/yolo_annos
+      hash: md5
+      md5: 473a8eedf5fd4cc5d5b8b6d1f80681e7.dir
+      size: 293713920
+      nfiles: 3
+  build_condition_negatives:
+    cmd: scripts/create_condition_negatives.py --conditions-csv 
+      data/01_raw/sv_water_conditions/SV_conditions_tracking_tankeeah_2025.csv 
+      data/01_raw/sv_water_conditions/SV_conditions_tracking_bear_2025.csv 
+      data/01_raw/sv_water_conditions/SV_conditions_tracking_kitwanga_2025.csv 
+      --out-dir data/02_interim/yolo_condition_negatives --frames-per-video 20 
+      --frame-stride 3 --frame-offset-mode video_hash --shard-size 100000
+    deps:
+    - path: data/01_raw/sv_water_conditions
+      hash: md5
+      md5: 717ade5c2fd57a763609ab55d66bf351.dir
+      size: 199437
+      nfiles: 10
+    - path: src/object_detection/negatives/cli.py
+      hash: md5
+      md5: d04fcc8b316529ebefeec2830b288197
+      size: 2142
+    - path: src/object_detection/negatives/conditions.py
+      hash: md5
+      md5: f037be2028fead4a5a9e88f95799254a
+      size: 18950
+    - path: src/object_detection/yolo_ls/shards.py
+      hash: md5
+      md5: 9a1e97420b95c978e10ec236c2645337
+      size: 1266
+    outs:
+    - path: data/02_interim/yolo_condition_negatives
+      hash: md5
+      md5: c3505d079516f38874bdb42d2182cb46.dir
+      size: 338928
+      nfiles: 3
diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml
new file mode 100644
index 0000000..33ccbaf
--- /dev/null
+++ b/training/object-detection/dvc.yaml
@@ -0,0 +1,142 @@
+stages:
+  update_raw:
+    cmd: >-
+      rclone sync 
+      -P
+      --filter "- /salm_dataset*/**"
+      --filter "+ *.json"
+      --filter "- *.zip"
+      aws:salmonvision-ml-datasets/rgb/raw/
+      data/01_raw/labelstudio_annos
+    deps:
+      - s3://salmonvision-ml-datasets/rgb/raw
+    outs:
+      - data/01_raw/labelstudio_annos
+    frozen: true
+  build_model_input:
+    cmd: >-
+      scripts/yolo_converter_ls_video.py
+      data/01_raw/labelstudio_annos
+      --data-yaml config/salmon_yolo.yaml
+      --out data/02_interim/yolo_annos_fs
+      --out-shards data/02_interim/yolo_annos
+      --empty-list data/02_interim/yolo_annos/empty_vids.txt
+      --shard-size 100000
+      --pattern '**/*.json'
+      --include-sites tankeeah kitwanga bear
+      --frame-stride 3
+      --frame-offset-mode video_hash
+      --include-negatives
+      --negative-ratio 0.10
+      --negatives-per-video 11
+    deps:
+      - scripts/yolo_converter_ls_video.py
+      - src/object_detection/yolo_ls
+      - src/object_detection/utils
+      - data/01_raw/labelstudio_annos
+      - config/salmon_yolo.yaml
+    outs:
+      - data/02_interim/yolo_annos
+  build_condition_negatives:
+    cmd: >-
+      scripts/create_condition_negatives.py
+      --conditions-csv data/01_raw/sv_water_conditions/SV_conditions_tracking_tankeeah_2025.csv
+      data/01_raw/sv_water_conditions/SV_conditions_tracking_bear_2025.csv
+      data/01_raw/sv_water_conditions/SV_conditions_tracking_kitwanga_2025.csv
+      --out-dir data/02_interim/yolo_condition_negatives
+      --frames-per-video 20
+      --frame-stride 3
+      --frame-offset-mode video_hash
+      --shard-size 100000
+    deps:
+      - src/object_detection/negatives/conditions.py
+      - src/object_detection/negatives/cli.py
+      - src/object_detection/yolo_ls/shards.py
+      - data/01_raw/sv_water_conditions
+    outs:
+      - data/02_interim/yolo_condition_negatives
+  split_data:
+    cmd: >-
+      rm -rf data/02_interim/yolo_annos_unpacked &&
+      scripts/unpack_annos.sh
+      data/02_interim/yolo_annos
+      data/02_interim/yolo_annos_unpacked &&
+      scripts/unpack_annos.sh
+      data/02_interim/yolo_condition_negatives
+      data/02_interim/yolo_annos_unpacked &&
+      scripts/make_splits.py
+      --labels-root data/02_interim/yolo_annos_unpacked
+      --out-dir data/03_processed/splits_baseline
+      --sites tankeeah kitwanga bear
+      --seed 42
+      --train-frac 0.8 --val-frac 0.1 --test-frac 0.1
+    deps:
+      - scripts/make_splits.py
+      - src/object_detection/splits
+      - src/object_detection/utils
+      - data/02_interim/yolo_annos
+      - data/02_interim/yolo_condition_negatives
+      - data/01_raw/salmon_vid_counts.csv
+      - data/01_raw/salmon_vid_counts_summary.csv
+    outs:
+      - data/03_processed/splits_baseline
+  build_video_metadata_index:
+    cmd: >-
+      scripts/build_video_metadata_index.py
+      --json-dir data/01_raw/labelstudio_annos
+      --out-csv data/02_interim/video_metadata_index.csv
+    deps:
+      - src/object_detection/metadata/index.py
+      - src/object_detection/metadata/cli.py
+      - data/01_raw/labelstudio_annos
+    outs:
+      - data/02_interim/video_metadata_index.csv
+  pack_split_dataset:
+    cmd: >-
+      scripts/pack_split_dataset.py
+      --splits-dir data/03_processed/splits_baseline
+      --labels-root data/02_interim/yolo_annos_unpacked
+      --shards-root ${config.drive}/salmon_dataset/dataset_sharded/shards
+      --manifests-root ${config.drive}/salmon_dataset/dataset_sharded/manifests
+      --temp-video-dir ${config.drive}/salmon_dataset/tmp_videos
+      --metadata-csv data/02_interim/video_metadata_index.csv
+      data/02_interim/yolo_condition_negatives/condition_negative_video_metadata.csv
+      --data-yaml config/salmon_yolo.yaml
+      --bucket prod-salmonvision-edge-assets-labelstudio-source
+      --image-ext .jpg
+      --manifest-csv data/03_processed/packed_dataset_manifest.csv
+      --splits train val test
+      --shard-size 100000
+    deps:
+      - src/object_detection/frames/parsing.py
+      - src/object_detection/frames/extractor.py
+      - src/object_detection/frames/cli.py
+      - src/object_detection/yolo_ls/shards.py
+      - data/03_processed/splits_baseline
+      - data/02_interim/yolo_annos_unpacked
+      - data/02_interim/video_metadata_index.csv
+      - data/02_interim/yolo_condition_negatives/condition_negative_video_metadata.csv
+      - config/salmon_yolo.yaml
+    params:
+      - config.drive
+    outs:
+      - ${config.drive}/salmon_dataset/dataset_sharded/shards
+      - ${config.drive}/salmon_dataset/dataset_sharded/manifests
+      - data/03_processed/packed_dataset_manifest.csv
+  unpack_split_dataset:
+    cmd: >-
+      rm -rf ${config.drive}/salmon_dataset/yolo_workdir &&
+      mkdir -p ${config.drive}/salmon_dataset/yolo_workdir &&
+      scripts/unpack_annos.sh
+      ${config.drive}/salmon_dataset/dataset_sharded/shards
+      ${config.drive}/salmon_dataset/yolo_workdir &&
+      cp ${config.drive}/salmon_dataset/dataset_sharded/manifests/train.txt ${config.drive}/salmon_dataset/yolo_workdir/train.txt &&
+      cp ${config.drive}/salmon_dataset/dataset_sharded/manifests/val.txt ${config.drive}/salmon_dataset/yolo_workdir/val.txt &&
+      cp ${config.drive}/salmon_dataset/dataset_sharded/manifests/test.txt ${config.drive}/salmon_dataset/yolo_workdir/test.txt &&
+      cp ${config.drive}/salmon_dataset/dataset_sharded/manifests/data.yaml ${config.drive}/salmon_dataset/yolo_workdir/data.yaml
+    deps:
+      - ${config.drive}/salmon_dataset/dataset_sharded/shards
+      - ${config.drive}/salmon_dataset/dataset_sharded/manifests
+    params:
+      - config.drive
+    frozen: true
diff --git a/training/object-detection/main.py b/training/object-detection/main.py
new file mode 100644
index 0000000..f376d2d
--- /dev/null
+++ b/training/object-detection/main.py
@@ -0,0 +1,6 @@
+def main():
+    print("Hello from object-detection!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/params.yaml b/training/object-detection/params.yaml
new file mode 100644
index 0000000..4f332bb
--- /dev/null
+++ b/training/object-detection/params.yaml
@@ -0,0 +1,2 @@
+config:
+  drive: /mnt/harukassd4tb/masamim
diff --git a/training/object-detection/pyproject.toml b/training/object-detection/pyproject.toml
new file mode 100644
index 0000000..43fb8fb
--- /dev/null
+++ b/training/object-detection/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "object-detection"
+version = "0.1.0"
+description = "SalmonVision object detector training"
+readme = "README.md"
+requires-python = ">=3.8"
+dependencies = [
+    "boto3>=1.37.38",
+    "pyyaml>=6.0.3",
+]
+
+[dependency-groups]
+dev = [
+    "pytest>=8.3.5",
+]
+
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
diff --git a/training/object-detection/scripts/build_video_metadata_index.py b/training/object-detection/scripts/build_video_metadata_index.py
new file mode 100755
index 0000000..38250cb
--- /dev/null
+++ b/training/object-detection/scripts/build_video_metadata_index.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env -S uv run python
+from object_detection.metadata.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/scripts/create_condition_negatives.py b/training/object-detection/scripts/create_condition_negatives.py
new file mode 100755
index 0000000..547ddbb
--- /dev/null
+++ b/training/object-detection/scripts/create_condition_negatives.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env -S uv run python
+from object_detection.negatives.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/scripts/make_splits.py b/training/object-detection/scripts/make_splits.py
new file mode 100755
index 0000000..3ca26da
--- /dev/null
+++ b/training/object-detection/scripts/make_splits.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env -S uv run python
+
+"""
+make_splits.py
+
+Group-wise stratified-ish split for unpacked YOLO label files.
+
+- Input: unpacked labels directory that looks like:
+    <root>/<video_stem>/frame_000123.txt
+
+  where video_stem looks like:
+    ORG-site-device-id_YYYYMMDD_HHMMSS_M
+
+- Output:
+    out_dir/train.txt
+    out_dir/val.txt
+    out_dir/test.txt
+    out_dir/group_assignments.csv
+    out_dir/split_report.json
+
+Split unit: group_id = site + device + date(YYYYMMDD)
+Balancing objectives (soft): class counts, time-of-day, density bins, box area bins.
+"""
+
+from object_detection.splits.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/scripts/pack_split_dataset.py b/training/object-detection/scripts/pack_split_dataset.py
new file mode 100755
index 0000000..c4eb631
--- /dev/null
+++ b/training/object-detection/scripts/pack_split_dataset.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env -S uv run python
+from object_detection.frames.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/scripts/unpack_annos.sh b/training/object-detection/scripts/unpack_annos.sh
new file mode 100755
index 0000000..c78c455
--- /dev/null
+++ b/training/object-detection/scripts/unpack_annos.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -e
+
+help_msg() {
+    echo "$0 [-h] in_folder out_folder"
+}
+
+# Get the options
+while getopts ":h" option; do
+    case $option in
+        h) # display Help
+            help_msg
+            exit;;
+        \?) # Invalid option
+            echo "Error: Invalid option"
+            help_msg
+            exit;;
+    esac
+done
+
+# Check if exactly two arguments are given
+if [ $# -ne 2 ]; then
+    help_msg
+    exit 1
+fi
+
+in_path="$1"
+out_path="$2"
+
+mkdir -p "$out_path"
+for f in "$in_path"/*.tar; do
+    tar -xf "$f" -C "$out_path"
+done
diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py
new file mode 100755
index 0000000..9c816a7
--- /dev/null
+++ b/training/object-detection/scripts/yolo_converter_ls_video.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env -S uv run python
+
+from object_detection.yolo_ls.cli import main
+
+if __name__ == "__main__":
+    main()
diff --git a/training/object-detection/src/object_detection/__init__.py b/training/object-detection/src/object_detection/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/src/object_detection/frames/__init__.py b/training/object-detection/src/object_detection/frames/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/src/object_detection/frames/cli.py b/training/object-detection/src/object_detection/frames/cli.py
new file mode 100644
index 0000000..6c447fe
--- /dev/null
+++ b/training/object-detection/src/object_detection/frames/cli.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+import yaml
+
+from object_detection.frames.extractor import pack_split_dataset_shards
+
+
+def load_class_names_from_yolo_yaml(path: Path):
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    names = data.get("names")
+    if isinstance(names, dict):
+        return [names[k] for k in sorted(names, key=lambda x: int(x))]
+    if isinstance(names, list):
+        return names
+    raise ValueError(f"Unsupported names format in {path}")
+
+
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description="Pack split-aware YOLO dataset into tar shards.")
+    p.add_argument("--splits-dir", required=True, help="Directory containing train.txt / val.txt / test.txt")
+    p.add_argument("--labels-root", required=True, help="Root of unpacked label files referenced by split manifests")
+    p.add_argument("--shards-root", required=True, help="Output directory for packed tar shards")
+    p.add_argument("--manifests-root", required=True, help="Output directory for new image manifests + data.yaml")
+    p.add_argument("--temp-video-dir", required=True, help="Temporary directory for downloaded videos")
+    p.add_argument("--metadata-csv", nargs="+", required=True,
+                   help="One or more metadata CSVs with columns including video_stem,fps,s3_key")
+    p.add_argument("--data-yaml", required=True, help="YOLO data.yaml used only to get class names")
+    p.add_argument("--bucket", default="prod-salmonvision-edge-assets-labelstudio-source",
+                   help="Fallback bucket if metadata row lacks s3_key")
+    p.add_argument("--image-ext", default=".jpg", choices=[".jpg", ".png"])
+    p.add_argument("--keep-videos", action="store_true")
+    p.add_argument("--manifest-csv", default=None)
+    p.add_argument("--splits", nargs="*", default=["train", "val", "test"])
+    p.add_argument("--shard-size", type=int, default=100000)
+    return p
+
+
+def main() -> None:
+    args = build_parser().parse_args()
+    class_names = load_class_names_from_yolo_yaml(Path(args.data_yaml))
+
+    stats = pack_split_dataset_shards(
+        splits_dir=Path(args.splits_dir),
+        labels_root=Path(args.labels_root),
+        shards_root=Path(args.shards_root),
+        manifests_root=Path(args.manifests_root),
+        temp_video_dir=Path(args.temp_video_dir),
+        metadata_csv_paths=[Path(p) for p in args.metadata_csv],
+        class_names=class_names,
+        bucket=args.bucket,
+        image_ext=args.image_ext,
+        cleanup_video=not args.keep_videos,
+        split_names=args.splits,
+        manifest_csv=Path(args.manifest_csv) if args.manifest_csv else None,
+        shard_size=args.shard_size,
+    )
+
+    print(
+        f"Done. splits_seen={stats.splits_seen} "
+        f"videos_seen={stats.videos_seen} "
+        f"videos_processed={stats.videos_processed} "
+        f"videos_failed={stats.videos_failed} "
+        f"frames_requested={stats.frames_requested} "
+        f"images_written={stats.images_written} "
+        f"labels_written={stats.labels_written}"
+    )
diff --git a/training/object-detection/src/object_detection/frames/extractor.py b/training/object-detection/src/object_detection/frames/extractor.py
new file mode 100644
index 0000000..9b2860f
--- /dev/null
+++ b/training/object-detection/src/object_detection/frames/extractor.py
@@ -0,0 +1,369 @@
+from __future__ import annotations
+
+import csv
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Set
+import io
+import tarfile
+
+from object_detection.yolo_ls.shards import TarShardWriter
+from object_detection.frames.parsing import (
+    parse_manifest_relpath,
+    split_label_relpath_to_packed_paths,
+    video_stem_to_s3_key,
+)
+
+from object_detection.utils.utils import safe_float
+
+
+@dataclass
+class ExtractionStats:
+    splits_seen: int = 0
+    videos_seen: int = 0
+    videos_processed: int = 0
+    videos_failed: int = 0
+    frames_requested: int = 0
+    frames_written: int = 0
+    labels_written: int = 0
+
+def load_video_metadata_index(path: Path) -> Dict[str, Dict[str, str]]:
+    out: Dict[str, Dict[str, str]] = {}
+    with path.open("r", newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            video_stem = (row.get("video_stem") or "").strip()
+            if not video_stem:
+                continue
+            out[video_stem] = dict(row)
+    return out
+
+def merge_video_metadata_csvs(paths: Iterable[Path]) -> Dict[str, Dict[str, str]]:
+    """
+    Merge metadata CSVs by video_stem.
+    Later CSVs overwrite earlier CSVs on conflicts.
+    """
+    merged: Dict[str, Dict[str, str]] = {}
+    for path in paths:
+        current = load_video_metadata_index(path)
+        for video_stem, row in current.items():
+            if video_stem in merged:
+                prev = merged[video_stem]
+                if prev.get("fps") != row.get("fps") or prev.get("s3_key") != row.get("s3_key"):
+                    print(
+                        f"[frames] warning: overriding metadata for {video_stem} "
+                        f"from fps={prev.get('fps')} s3_key={prev.get('s3_key')} "
+                        f"to fps={row.get('fps')} s3_key={row.get('s3_key')}"
+                    )
+            merged[video_stem] = row
+    return merged
+
+def ensure_dir(path: Path) -> None:
+    path.mkdir(parents=True, exist_ok=True)
+
+
+def read_split_manifest(path: Path) -> List[str]:
+    lines: List[str] = []
+    for line in path.read_text(encoding="utf-8").splitlines():
+        s = line.strip()
+        if s:
+            lines.append(s)
+    return lines
+
+
+def write_split_manifests(
+    manifests_root: Path,
+    split_to_image_relpaths: Dict[str, List[str]],
+) -> None:
+    ensure_dir(manifests_root)
+    for split, relpaths in split_to_image_relpaths.items():
+        out_path = manifests_root / f"{split}.txt"
+        relpaths = sorted(relpaths)
+        out_path.write_text("\n".join(relpaths) + ("\n" if relpaths else ""), encoding="utf-8")
+
+
+def write_data_yaml(
+    manifests_root: Path,
+    class_names: List[str],
+) -> None:
+    """
+    Writes a YOLO-style data.yaml that uses split manifest files.
+    """
+    lines = [
+        f"train: {str((manifests_root / 'train.txt').resolve())}",
+        f"val: {str((manifests_root / 'val.txt').resolve())}",
+        f"test: {str((manifests_root / 'test.txt').resolve())}",
+        "names:",
+    ]
+    for idx, name in enumerate(class_names):
+        lines.append(f"  {idx}: {name}")
+    (manifests_root / "data.yaml").write_text("\n".join(lines) + "\n", encoding="utf-8")
+
+
+def load_split_requests(splits_dir: Path, split_names: Iterable[str]) -> Dict[str, Dict[str, List[int]]]:
+    """
+    Returns:
+      {
+        "train": {video_stem: [frame_idx, ...], ...},
+        "val":   {...},
+        "test":  {...},
+      }
+    """
+    out: Dict[str, Dict[str, List[int]]] = {}
+
+    for split in split_names:
+        manifest = splits_dir / f"{split}.txt"
+        if not manifest.exists():
+            continue
+
+        by_video: Dict[str, List[int]] = {}
+        for line in read_split_manifest(manifest):
+            video_stem, frame_idx = parse_manifest_relpath(line)
+            by_video.setdefault(video_stem, []).append(frame_idx)
+
+        # dedupe + sort
+        by_video = {k: sorted(set(v)) for k, v in by_video.items()}
+        out[split] = by_video
+
+    return out
+
+
+def download_s3_video(bucket: str, s3_key: str, local_video_path: Path) -> None:
+    ensure_dir(local_video_path.parent)
+    cmd = [
+        "aws", "s3", "cp",
+        f"s3://{bucket}/{s3_key}",
+        str(local_video_path),
+    ]
+    subprocess.run(cmd, check=True)
+
+
+def read_label_text(labels_root: Path, relpath: str) -> str:
+    path = labels_root / relpath
+    return path.read_text(encoding="utf-8")
+
+
+def extract_frame_bytes_ffmpeg(
+    video_path: Path,
+    frame_idx: int,
+    fps: float,
+    image_ext: str = ".jpg",
+) -> bytes:
+    """
+    Extract one frame and return the encoded image bytes.
+    """
+    timestamp = frame_idx / float(fps)
+
+    if image_ext == ".jpg":
+        codec_args = ["-f", "image2", "-vcodec", "mjpeg"]
+    elif image_ext == ".png":
+        codec_args = ["-f", "image2", "-vcodec", "png"]
+    else:
+        raise ValueError(f"Unsupported image_ext: {image_ext}")
+
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-loglevel", "error",
+        "-ss", f"{timestamp:.6f}",
+        "-i", str(video_path),
+        "-frames:v", "1",
+    ] + codec_args + ["pipe:1"]
+
+    result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE)
+    return result.stdout
+
+
+def extract_frame_ffmpeg(
+    video_path: Path,
+    frame_idx: int,
+    fps: float,
+    output_path: Path,
+    overwrite: bool = False,
+) -> bool:
+    """
+    Extract one frame using timestamp = frame_idx / fps.
+    """
+    if output_path.exists() and not overwrite:
+        return False
+
+    ensure_dir(output_path.parent)
+    timestamp = frame_idx / float(fps)
+
+    cmd = [
+        "ffmpeg",
+        "-hide_banner",
+        "-loglevel", "error",
+        "-ss", f"{timestamp:.6f}",
+        "-i", str(video_path),
+        "-frames:v", "1",
+        "-q:v", "2",
+        "-y" if overwrite else "-n",
+        str(output_path),
+    ]
+    subprocess.run(cmd, check=True)
+    return True
+
+
+def pack_split_dataset_shards(
+    splits_dir: Path,
+    labels_root: Path,
+    shards_root: Path,
+    manifests_root: Path,
+    temp_video_dir: Path,
+    metadata_csv_paths: Iterable[Path],
+    class_names: List[str],
+    bucket: str,
+    image_ext: str = ".jpg",
+    cleanup_video: bool = True,
+    split_names: Iterable[str] = ("train", "val", "test"),
+    manifest_csv: Optional[Path] = None,
+    shard_size: int = 100000,
+) -> ExtractionStats:
+    """
+    Reads split manifests containing label relpaths, e.g.
+      HIRMD-.../frame_000123.txt
+
+    Produces sharded paired dataset:
+      train/<video_stem>/frame_000123.jpg
+      train/<video_stem>/frame_000123.txt
+      ...
+
+    Also writes fresh split manifests that point to image relpaths inside the packed layout:
+      train/HIRMD-.../frame_000123.jpg
+    """
+    split_requests = load_split_requests(splits_dir, split_names)
+    stats = ExtractionStats(splits_seen=len(split_requests))
+    metadata_index = merge_video_metadata_csvs(metadata_csv_paths)
+
+    ensure_dir(shards_root)
+    ensure_dir(manifests_root)
+
+    shard_writers: Dict[str, TarShardWriter] = {}
+    for split in split_requests.keys():
+        shard_writers[split] = TarShardWriter(
+            shards_root,
+            shard_size=shard_size,
+            prefix=split,
+        )
+
+    split_to_image_relpaths: Dict[str, List[str]] = {split: [] for split in split_requests.keys()}
+    manifest_rows: List[Dict[str, str]] = []
+
+    for split, by_video in split_requests.items():
+        writer = shard_writers[split]
+
+        for video_stem, frame_indices in by_video.items():
+            stats.videos_seen += 1
+            stats.frames_requested += len(frame_indices)
+
+            local_video = temp_video_dir / f"{video_stem}.mp4"
+            s3_key = ""
+            fps = 0.0
+
+            try:
+                meta = metadata_index.get(video_stem)
+                if meta is None:
+                    raise KeyError(f"Missing metadata for video_stem={video_stem}")
+
+                fps = safe_float(meta.get("fps", ""), 0.0)
+                if fps <= 0:
+                    raise ValueError(f"Invalid fps for video_stem={video_stem}: {meta.get('fps', '')!r}")
+
+                s3_key = (meta.get("s3_key") or "").strip()
+                if not s3_key:
+                    if not bucket:
+                        raise ValueError(f"Missing s3_key for video_stem={video_stem}")
+                    s3_key = video_stem_to_s3_key(video_stem)
+
+                download_s3_video(bucket=bucket, s3_key=s3_key, local_video_path=local_video)
+
+                for frame_idx in frame_indices:
+                    label_relpath = f"{video_stem}/frame_{frame_idx:06d}.txt"
+                    image_relpath, packed_label_relpath = split_label_relpath_to_packed_paths(
+                        split=split,
+                        relpath=label_relpath,
+                        image_ext=image_ext,
+                    )
+
+                    image_bytes = extract_frame_bytes_ffmpeg(
+                        video_path=local_video,
+                        frame_idx=frame_idx,
+                        fps=fps,
+                        image_ext=image_ext,
+                    )
+                    label_text = read_label_text(labels_root, label_relpath)
+
+                    writer.write_bytes(str(image_relpath), image_bytes)
+                    split_to_image_relpaths[split].append(str(image_relpath))
+
+                    stats.images_written += 1
+
+                    writer.write_text(str(packed_label_relpath), label_text)
+                    stats.labels_written += 1
+
+                stats.videos_processed += 1
+
+                manifest_rows.append({
+                    "split": split,
+                    "video_stem": video_stem,
+                    "s3_key": s3_key,
+                    "fps": str(fps),
+                    "requested_frames": str(len(frame_indices)),
+                    "images_written": str(len(frame_indices)),
+                    "labels_written": str(len(frame_indices)),
+                    "status": "ok",
+                    "error": "",
+                })
+
+            except Exception as e:
+                stats.videos_failed += 1
+                manifest_rows.append({
+                    "split": split,
+                    "video_stem": video_stem,
+                    "s3_key": s3_key,
+                    "fps": str(fps) if fps > 0 else "",
+                    "requested_frames": str(len(frame_indices)),
+                    "images_written": "0",
+                    "labels_written": "0",
+                    "status": "error",
+                    "error": repr(e),
+                })
+
+            finally:
+                if cleanup_video:
+                    try:
+                        if local_video.exists():
+                            local_video.unlink()
+                    except Exception:
+                        pass
+
+    for writer in shard_writers.values():
+        writer.close()
+
+    write_split_manifests(manifests_root, split_to_image_relpaths)
+    write_data_yaml(manifests_root, class_names)
+
+    if manifest_csv is not None:
+        ensure_dir(manifest_csv.parent)
+        with manifest_csv.open("w", newline="", encoding="utf-8") as f:
+            w = csv.DictWriter(
+                f,
+                fieldnames=[
+                    "split",
+                    "video_stem",
+                    "s3_key",
+                    "fps",
+                    "requested_frames",
+                    "images_written",
+                    "labels_written",
+                    "status",
+                    "error",
+                ],
+            )
+            w.writeheader()
+            for row in manifest_rows:
+                w.writerow(row)
+
+    return stats
diff --git a/training/object-detection/src/object_detection/frames/parsing.py b/training/object-detection/src/object_detection/frames/parsing.py
new file mode 100644
index 0000000..3685b8d
--- /dev/null
+++ b/training/object-detection/src/object_detection/frames/parsing.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Dict, Optional, Tuple
+
+from object_detection.utils.utils import parse_video_stem
+
+
+def split_label_relpath_to_packed_paths(
+    split: str,
+    relpath: str,
+    image_ext: str = ".jpg",
+) -> Tuple[Path, Path]:
+    """
+    Convert a split manifest label entry like:
+      HIRMD-tankeeah-jetson-0_20250714_012827_M/frame_000123.txt
+
+    into packed dataset paths:
+      train/HIRMD-tankeeah-jetson-0_20250714_012827_M/frame_000123.jpg
+      train/HIRMD-tankeeah-jetson-0_20250714_012827_M/frame_000123.txt
+    """
+    p = Path(relpath.strip())
+    label_rel = Path(split) / p
+    image_rel = label_rel.with_suffix(image_ext)
+    return image_rel, label_rel
+
+
+def parse_frame_idx(label_filename: str) -> Optional[int]:
+    m = re.match(r"^frame_(\d+)\.txt$", label_filename)
+    if not m:
+        return None
+    try:
+        return int(m.group(1))
+    except Exception:
+        return None
+
+
+def video_stem_to_s3_key(video_stem: str) -> str:
+    meta = parse_video_stem(video_stem)
+    if meta is None:
+        raise ValueError("Could not parse video stem: %s" % video_stem)
+    return f"{meta['org']}/{meta['site']}/{meta['device']}/motion_vids/{video_stem}.mp4"
+
+
+def parse_manifest_relpath(relpath: str) -> Tuple[str, int]:
+    """
+    Input line example:
+      HIRMD-tankeeah-jetson-0_20250714_012827_M/frame_000123.txt
+    Returns:
+      (video_stem, frame_idx)
+    """
+    p = Path(relpath.strip())
+    if len(p.parts) < 2:
+        raise ValueError("Invalid manifest relpath: %s" % relpath)
+
+    video_stem = p.parts[0]
+    frame_idx = parse_frame_idx(p.name)
+    if frame_idx is None:
+        raise ValueError("Invalid frame filename: %s" % p.name)
+
+    return video_stem, frame_idx
+
+
+def label_relpath_to_image_relpath(relpath: str, image_ext: str = ".jpg") -> Path:
+    p = Path(relpath.strip())
+    return p.with_suffix(image_ext)
diff --git a/training/object-detection/src/object_detection/metadata/__init__.py b/training/object-detection/src/object_detection/metadata/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/src/object_detection/metadata/cli.py b/training/object-detection/src/object_detection/metadata/cli.py
new file mode 100644
index 0000000..1cc3942
--- /dev/null
+++ b/training/object-detection/src/object_detection/metadata/cli.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from object_detection.metadata.index import build_video_metadata_index, write_video_metadata_index
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description="Build per-video metadata index from Label Studio task JSONs.")
+    p.add_argument("--json-dir", required=True)
+    p.add_argument("--out-csv", required=True)
+    args = p.parse_args()
+
+    rows = build_video_metadata_index(Path(args.json_dir))
+    write_video_metadata_index(rows, Path(args.out_csv))
+
+    print(f"Done. indexed_videos={len(rows)} out={args.out_csv}")
diff --git a/training/object-detection/src/object_detection/metadata/index.py b/training/object-detection/src/object_detection/metadata/index.py
new file mode 100644
index 0000000..69d3a72
--- /dev/null
+++ b/training/object-detection/src/object_detection/metadata/index.py
@@ -0,0 +1,128 @@
+from __future__ import annotations
+
+import csv
+import json
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional
+
+
+def safe_float(v: Any, default: float = 0.0) -> float:
+    try:
+        return float(v)
+    except Exception:
+        return default
+
+
+def parse_ffmpeg_rate(rate: Any) -> float:
+    if rate is None:
+        return 0.0
+    if isinstance(rate, (int, float)):
+        return float(rate)
+
+    s = str(rate).strip()
+    if "/" in s:
+        a, b = s.split("/", 1)
+        try:
+            num = float(a)
+            den = float(b)
+            return num / den if den else 0.0
+        except Exception:
+            return 0.0
+    try:
+        return float(s)
+    except Exception:
+        return 0.0
+
+
+def infer_fps(data: Dict[str, Any]) -> float:
+    fps = safe_float(data.get("frames_per_second"), 0.0)
+    if fps > 0:
+        return fps
+
+    fps = parse_ffmpeg_rate(data.get("metadata_video_r_frame_rate"))
+    if fps > 0:
+        return fps
+
+    fps = parse_ffmpeg_rate(data.get("metadata_video_avg_frame_rate"))
+    if fps > 0:
+        return fps
+
+    duration = safe_float(data.get("metadata_video_duration", data.get("duration")), 0.0)
+    nb_frames = int(safe_float(data.get("metadata_video_nb_frames"), 0))
+    if duration > 0 and nb_frames > 0:
+        return nb_frames / duration
+
+    return 0.0
+
+
+def infer_s3_key(data: Dict[str, Any], video_stem: str) -> str:
+    org = data.get("metadata_file_organization_reference_string", "")
+    site = data.get("metadata_file_site_reference_string", "")
+    cam = data.get("metadata_file_camera_reference_string", "")
+    if org and site and cam:
+        return f"{org}/{site}/{cam}/motion_vids/{video_stem}.mp4"
+    return ""
+
+
+def iter_task_items(json_dir: Path, pattern: str = "**/*.json") -> Iterable[Dict[str, Any]]:
+    for path in sorted(json_dir.glob(pattern)):
+        try:
+            payload = json.loads(path.read_text(encoding="utf-8"))
+        except Exception:
+            continue
+
+        if isinstance(payload, dict):
+            yield payload
+        elif isinstance(payload, list):
+            for item in payload:
+                if isinstance(item, dict):
+                    yield item
+
+
+def build_video_metadata_index(json_dir: Path) -> List[Dict[str, str]]:
+    rows: Dict[str, Dict[str, str]] = {}
+
+    for item in iter_task_items(json_dir):
+        data = item.get("data") or {}
+        filename = data.get("metadata_file_filename") or data.get("video") or ""
+        video_stem = Path(filename).stem
+        if not video_stem:
+            continue
+
+        row = {
+            "video_stem": video_stem,
+            "fps": str(infer_fps(data)),
+            "nb_frames": str(int(safe_float(data.get("metadata_video_nb_frames"), 0))),
+            "duration": str(safe_float(data.get("metadata_video_duration", data.get("duration")), 0.0)),
+            "width": str(int(safe_float(data.get("metadata_video_width"), 0))),
+            "height": str(int(safe_float(data.get("metadata_video_height"), 0))),
+            "org": str(data.get("metadata_file_organization_reference_string", "")),
+            "site": str(data.get("metadata_file_site_reference_string", "")),
+            "device": str(data.get("metadata_file_camera_reference_string", "")),
+            "s3_key": infer_s3_key(data, video_stem),
+        }
+        rows[video_stem] = row
+
+    return list(rows.values())
+
+
+def write_video_metadata_index(rows: List[Dict[str, str]], out_csv: Path) -> None:
+    out_csv.parent.mkdir(parents=True, exist_ok=True)
+    fieldnames = [
+        "video_stem",
+        "fps",
+        "nb_frames",
+        "duration",
+        "width",
+        "height",
+        "org",
+        "site",
+        "device",
+        "s3_key",
+    ]
+    with out_csv.open("w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for row in rows:
+            w.writerow(row)
+
diff --git a/training/object-detection/src/object_detection/negatives/cli.py b/training/object-detection/src/object_detection/negatives/cli.py
new file mode 100644
index 0000000..4d2aa4a
--- /dev/null
+++ b/training/object-detection/src/object_detection/negatives/cli.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from object_detection.negatives.conditions import create_condition_negative_shards
+
+
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description="Create condition-balanced negative YOLO label shards from water-conditions CSVs")
+    p.add_argument("--conditions-csv", nargs="+", required=True, help="One or more water-conditions CSV files")
+    p.add_argument("--out-dir", required=True, help="Output directory for negative shards and manifests")
+    p.add_argument("--bucket", default="prod-salmonvision-edge-assets-labelstudio-source")
+    p.add_argument("--frames-per-video", type=int, default=5)
+    p.add_argument("--frame-stride", type=int, default=3)
+    p.add_argument("--frame-offset-mode", choices=["fixed", "video_hash"], default="video_hash")
+    p.add_argument("--frame-offset", type=int, default=0)
+    p.add_argument("--shard-size", type=int, default=100000)
+    p.add_argument("--negative-seed", type=int, default=42)
+    p.add_argument("--result-type", default="videorectangle")
+    p.add_argument("--from-name", default=None)
+    p.add_argument("--to-name", default=None)
+    p.add_argument("--aws-profile", default=None)
+    p.add_argument("--cache-task-json-dir", default=None)
+    return p
+
+
+def main() -> None:
+    args = build_parser().parse_args()
+
+    summary = create_condition_negative_shards(
+        csv_paths=[Path(p) for p in args.conditions_csv],
+        out_dir=Path(args.out_dir),
+        bucket=args.bucket,
+        frames_per_video=args.frames_per_video,
+        frame_stride=args.frame_stride,
+        frame_offset_mode=args.frame_offset_mode,
+        frame_offset=args.frame_offset,
+        shard_size=args.shard_size,
+        negative_seed=args.negative_seed,
+        result_type=args.result_type,
+        from_name=args.from_name,
+        to_name=args.to_name,
+        aws_profile=args.aws_profile,
+        cache_task_json_dir=Path(args.cache_task_json_dir) if args.cache_task_json_dir else None,
+    )
+
+    print(
+        f"Done. input_rows={summary['input_rows']} "
+        f"selected_videos={summary['written_videos']} "
+        f"written_negative_frames={summary['written_negative_frames']} "
+        f"failures={len(summary['failures'])}"
+    )
diff --git a/training/object-detection/src/object_detection/negatives/conditions.py b/training/object-detection/src/object_detection/negatives/conditions.py
new file mode 100644
index 0000000..d68b7c8
--- /dev/null
+++ b/training/object-detection/src/object_detection/negatives/conditions.py
@@ -0,0 +1,706 @@
+from __future__ import annotations
+
+import csv
+import json
+import random
+from collections import Counter
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple
+
+import boto3
+
+from object_detection.utils.utils import safe_float
+from object_detection.yolo_ls.shards import TarShardWriter
+
+
+EXCLUDED_COLUMNS = {
+    "Project",
+    "Site",
+    "Camera",
+    "Filename",   # Label Studio task ID, not video filename
+    "Date",
+    "Time",
+    "Notes:",
+    "Image Link:",
+}
+
+DEFAULT_BUCKET = "prod-salmonvision-edge-assets-labelstudio-source"
+
+
+@dataclass(frozen=True)
+class ConditionRow:
+    project: str
+    site: str
+    camera: str
+    labelstudio_task_id: str
+    date: str
+    time: str
+    video_stem: str
+    s3_key: str
+    conditions: Dict[str, str]
+    source_csv: str
+
+
+@dataclass
+class VideoNegativeSample:
+    video_stem: str
+    s3_key: str
+    sampled_frames: List[int]
+    total_frames: int
+    positive_frames: int
+    eligible_negative_frames: int
+    conditions: Dict[str, str]
+    source_csv: str
+
+
+@dataclass
+class VideoMetadataRecord:
+    video_stem: str
+    s3_key: str
+    fps: float
+    nb_frames: int
+    duration: float
+    width: int
+    height: int
+    org: str
+    site: str
+    device: str
+    source_csv: str
+
+
+def normalize_value(v: Any) -> Optional[str]:
+    if v is None:
+        return None
+    s = str(v).strip()
+    if not s or s.upper() == "NA":
+        return None
+    return s
+
+
+def normalize_date(date_str: str) -> str:
+    dt = datetime.strptime(date_str.strip(), "%Y-%m-%d")
+    return dt.strftime("%Y%m%d")
+
+
+def normalize_time(time_str: str) -> str:
+    # Handles "3:17:05" and "03:17:05"
+    dt = datetime.strptime(time_str.strip(), "%H:%M:%S")
+    return dt.strftime("%H%M%S")
+
+
+def construct_video_stem(project: str, site: str, camera: str, date_str: str, time_str: str) -> str:
+    return f"{project}-{site}-{camera}_{normalize_date(date_str)}_{normalize_time(time_str)}_M"
+
+
+def construct_task_s3_key(project: str, site: str, camera: str, video_stem: str) -> str:
+    return f"{project}/{site}/{camera}/labelstudio_tasks/{video_stem}.json"
+
+
+def infer_condition_columns(fieldnames: Sequence[str]) -> List[str]:
+    cols: List[str] = []
+    for name in fieldnames:
+        if name is None:
+            continue
+        s = name.strip()
+        if not s:
+            continue
+        if s in EXCLUDED_COLUMNS:
+            continue
+        cols.append(s)
+    return cols
+
+
+def load_condition_rows(csv_paths: Sequence[Path]) -> Tuple[List[ConditionRow], List[str]]:
+    rows: List[ConditionRow] = []
+    all_fieldnames: List[str] = []
+
+    for csv_path in csv_paths:
+        with csv_path.open("r", newline="", encoding="utf-8-sig") as f:
+            reader = csv.DictReader(f)
+            if reader.fieldnames:
+                for fn in reader.fieldnames:
+                    if fn not in all_fieldnames:
+                        all_fieldnames.append(fn)
+
+            for raw in reader:
+                project = normalize_value(raw.get("Project"))
+                site = normalize_value(raw.get("Site"))
+                camera = normalize_value(raw.get("Camera"))
+                labelstudio_task_id = normalize_value(raw.get("Filename"))
+                date_str = normalize_value(raw.get("Date"))
+                time_str = normalize_value(raw.get("Time"))
+
+                # Skip blank rows and placeholder rows like "NA"
+                if not project or not site or not camera or not date_str or not time_str:
+                    continue
+                if not labelstudio_task_id:
+                    continue
+
+                try:
+                    video_stem = construct_video_stem(project, site, camera, date_str, time_str)
+                except ValueError:
+                    continue
+
+                condition_values: Dict[str, str] = {}
+                for col in infer_condition_columns(reader.fieldnames or []):
+                    v = normalize_value(raw.get(col))
+                    if v is not None:
+                        condition_values[col] = v
+
+                row = ConditionRow(
+                    project=project,
+                    site=site,
+                    camera=camera,
+                    labelstudio_task_id=labelstudio_task_id,
+                    date=date_str,
+                    time=time_str,
+                    video_stem=video_stem,
+                    s3_key=construct_task_s3_key(project, site, camera, video_stem),
+                    conditions=condition_values,
+                    source_csv=str(csv_path),
+                )
+                rows.append(row)
+
+    # Dedupe by real video stem
+    dedup: Dict[str, ConditionRow] = {}
+    for row in rows:
+        dedup[row.video_stem] = row
+
+    deduped = list(dedup.values())
+    condition_columns = infer_condition_columns(all_fieldnames)
+    return deduped, condition_columns
+
+
+def active_condition_columns(rows: Sequence[ConditionRow], condition_columns: Sequence[str]) -> List[str]:
+    keep: List[str] = []
+    for col in condition_columns:
+        vals = sorted({r.conditions[col] for r in rows if col in r.conditions})
+        if len(vals) >= 2:
+            keep.append(col)
+    return keep
+
+
+def compute_condition_targets(
+    rows: Sequence[ConditionRow],
+    condition_columns: Sequence[str],
+) -> Tuple[Dict[Tuple[str, str], int], Dict[str, Counter]]:
+    per_col_counts: Dict[str, Counter] = {}
+    targets: Dict[Tuple[str, str], int] = {}
+
+    for col in condition_columns:
+        c = Counter()
+        for row in rows:
+            if col in row.conditions:
+                c[row.conditions[col]] += 1
+        if not c:
+            continue
+
+        per_col_counts[col] = c
+        target = min(c.values())
+        for value in c:
+            targets[(col, value)] = target
+
+    return targets, per_col_counts
+
+
+def greedy_select_balanced_rows(
+    rows: Sequence[ConditionRow],
+    condition_columns: Sequence[str],
+) -> Tuple[List[ConditionRow], Dict[Tuple[str, str], int], Dict[str, Counter]]:
+    """
+    Greedy marginal balancing:
+    - each condition column is balanced independently to its rarest category count
+    - rows that satisfy multiple deficits are preferred
+    """
+    targets, per_col_counts = compute_condition_targets(rows, condition_columns)
+    deficits = dict(targets)
+
+    remaining = list(rows)
+    selected: List[ConditionRow] = []
+
+    def row_score(row: ConditionRow) -> int:
+        score = 0
+        for col in condition_columns:
+            val = row.conditions.get(col)
+            if val is None:
+                continue
+            score += max(deficits.get((col, val), 0), 0)
+        return score
+
+    while True:
+        best_row = None
+        best_score = 0
+
+        for row in remaining:
+            score = row_score(row)
+            if score > best_score:
+                best_score = score
+                best_row = row
+
+        if best_row is None or best_score <= 0:
+            break
+
+        selected.append(best_row)
+        remaining.remove(best_row)
+
+        for col in condition_columns:
+            val = best_row.conditions.get(col)
+            if val is None:
+                continue
+            key = (col, val)
+            if key in deficits and deficits[key] > 0:
+                deficits[key] -= 1
+
+    return selected, targets, per_col_counts
+
+
+def parse_ts(s: str) -> datetime:
+    return datetime.fromisoformat(s.replace("Z", "+00:00"))
+
+
+def parse_ffmpeg_rate(rate: Any) -> float:
+    if rate is None:
+        return 0.0
+    if isinstance(rate, (int, float)):
+        return float(rate)
+
+    s = str(rate).strip()
+    if "/" in s:
+        a, b = s.split("/", 1)
+        try:
+            num = float(a)
+            den = float(b)
+            return num / den if den else 0.0
+        except Exception:
+            return 0.0
+    try:
+        return float(s)
+    except Exception:
+        return 0.0
+
+
+def infer_fps(data: Dict[str, Any]) -> float:
+    fps = safe_float(data.get("frames_per_second"), 0.0)
+    if fps > 0:
+        return fps
+
+    fps = parse_ffmpeg_rate(data.get("metadata_video_r_frame_rate"))
+    if fps > 0:
+        return fps
+
+    fps = parse_ffmpeg_rate(data.get("metadata_video_avg_frame_rate"))
+    if fps > 0:
+        return fps
+
+    duration = safe_float(data.get("metadata_video_duration", data.get("duration", 0.0)), 0.0)
+    nb_frames = int(safe_float(data.get("metadata_video_nb_frames"), 0))
+    if duration > 0 and nb_frames > 0:
+        return nb_frames / duration
+
+    return 0.0
+
+
+def extract_video_metadata_record(
+    item: dict,
+    *,
+    video_stem: str,
+    s3_key: str,
+    source_csv: str,
+) -> VideoMetadataRecord:
+    data = item.get("data") or {}
+
+    return VideoMetadataRecord(
+        video_stem=video_stem,
+        s3_key=s3_key,
+        fps=infer_fps(data),
+        nb_frames=int(safe_float(data.get("metadata_video_nb_frames"), 0)),
+        duration=safe_float(data.get("metadata_video_duration", data.get("duration", 0.0)), 0.0),
+        width=int(safe_float(data.get("metadata_video_width"), 0)),
+        height=int(safe_float(data.get("metadata_video_height"), 0)),
+        org=str(data.get("metadata_file_organization_reference_string", "")),
+        site=str(data.get("metadata_file_site_reference_string", "")),
+        device=str(data.get("metadata_file_camera_reference_string", "")),
+        source_csv=source_csv,
+    )
+
+
+def cache_task_json(task_json: Any, cache_root: Path, s3_key: str) -> Path:
+    out_path = cache_root / s3_key
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(task_json, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    return out_path
+
+
+def interpolate_sequence(seq: Iterable[dict]) -> Dict[int, List[Tuple[float, float, float, float]]]:
+    kfs = sorted(seq, key=lambda k: int(safe_float(k.get("frame"), 0)))
+    frames_boxes: Dict[int, List[Tuple[float, float, float, float]]] = {}
+
+    if not kfs:
+        return frames_boxes
+
+    for k in kfs:
+        f = int(safe_float(k.get("frame"), -1))
+        if f < 0:
+            continue
+        x = safe_float(k.get("x"))
+        y = safe_float(k.get("y"))
+        w = safe_float(k.get("width"))
+        h = safe_float(k.get("height"))
+        frames_boxes.setdefault(f, []).append((x, y, w, h))
+
+    for i in range(len(kfs) - 1):
+        k0 = kfs[i]
+        k1 = kfs[i + 1]
+
+        f0 = int(safe_float(k0.get("frame"), -1))
+        f1 = int(safe_float(k1.get("frame"), -1))
+        if f0 < 0 or f1 <= f0:
+            continue
+
+        enabled0 = bool(k0.get("enabled", True))
+        if not enabled0:
+            continue
+
+        x0 = safe_float(k0.get("x"))
+        y0 = safe_float(k0.get("y"))
+        w0 = safe_float(k0.get("width"))
+        h0 = safe_float(k0.get("height"))
+
+        x1 = safe_float(k1.get("x"))
+        y1 = safe_float(k1.get("y"))
+        w1 = safe_float(k1.get("width"))
+        h1 = safe_float(k1.get("height"))
+
+        for f in range(f0 + 1, f1):
+            t = (f - f0) / float(f1 - f0)
+            x = x0 + (x1 - x0) * t
+            y = y0 + (y1 - y0) * t
+            w = w0 + (w1 - w0) * t
+            h = h0 + (h1 - h0) * t
+            frames_boxes.setdefault(f, []).append((x, y, w, h))
+
+    return frames_boxes
+
+
+def extract_task_item(task_json: Any, expected_video_stem: str) -> dict:
+    if isinstance(task_json, dict):
+        return task_json
+    if isinstance(task_json, list):
+        if len(task_json) == 1:
+            return task_json[0]
+        for item in task_json:
+            data = item.get("data") or {}
+            stem = Path(data.get("metadata_file_filename") or data.get("video") or "").stem
+            if stem == expected_video_stem:
+                return item
+        return task_json[0]
+    raise ValueError("Unsupported task JSON structure")
+
+
+def infer_total_frames(item: dict, results: Optional[List[dict]] = None) -> int:
+    data = item.get("data") or {}
+
+    n = int(safe_float(data.get("metadata_video_nb_frames"), 0))
+    if n > 0:
+        return n
+
+    if results:
+        for r in results:
+            value = r.get("value") or {}
+            n = int(safe_float(value.get("framesCount"), 0))
+            if n > 0:
+                return n
+
+    duration = safe_float(data.get("metadata_video_duration", data.get("duration", 0.0)), 0.0)
+
+    fps = safe_float(data.get("frames_per_second"), 0.0)
+    if fps <= 0:
+        fps = parse_ffmpeg_rate(data.get("metadata_video_r_frame_rate"))
+    if fps <= 0:
+        fps = parse_ffmpeg_rate(data.get("metadata_video_avg_frame_rate"))
+
+    if duration > 0 and fps > 0:
+        return int(round(duration * fps))
+
+    return 0
+
+
+def extract_latest_results(
+    item: dict,
+    result_type: str = "videorectangle",
+    from_name: Optional[str] = None,
+    to_name: Optional[str] = None,
+) -> List[dict]:
+    annos = item.get("annotations") or []
+    if not annos:
+        return []
+
+    latest_ann = max(annos, key=lambda a: parse_ts(a["updated_at"]))
+    out: List[dict] = []
+    for r in (latest_ann.get("result") or []):
+        if r.get("type") != result_type:
+            continue
+        if from_name is not None and r.get("from_name") != from_name:
+            continue
+        if to_name is not None and r.get("to_name") != to_name:
+            continue
+        out.append(r)
+    return out
+
+
+def extract_positive_frames(
+    item: dict,
+    result_type: str = "videorectangle",
+    from_name: Optional[str] = None,
+    to_name: Optional[str] = None,
+) -> Set[int]:
+    results = extract_latest_results(item, result_type=result_type, from_name=from_name, to_name=to_name)
+    positive: Set[int] = set()
+
+    for r in results:
+        value = r.get("value") or {}
+        seq = value.get("sequence") or []
+        frame_boxes = interpolate_sequence(seq)
+        positive.update(frame_boxes.keys())
+
+    return positive
+
+
+def stride_offset(video_stem: str, frame_stride: int, frame_offset_mode: str, frame_offset: int) -> int:
+    if frame_stride <= 1:
+        return 0
+    if frame_offset_mode == "fixed":
+        return int(frame_offset) % frame_stride
+    if frame_offset_mode == "video_hash":
+        import zlib
+        return zlib.crc32(video_stem.encode("utf-8")) % frame_stride
+    raise ValueError(f"Invalid frame_offset_mode: {frame_offset_mode}")
+
+
+def eligible_negative_frames(
+    video_stem: str,
+    total_frames: int,
+    positive_frames: Set[int],
+    frame_stride: int,
+    frame_offset_mode: str,
+    frame_offset: int,
+) -> List[int]:
+    off = stride_offset(video_stem, frame_stride, frame_offset_mode, frame_offset)
+    return [
+        f for f in range(total_frames)
+        if (f % frame_stride) == off and f not in positive_frames
+    ]
+
+
+def fetch_task_json(s3_client: Any, bucket: str, key: str) -> Any:
+    obj = s3_client.get_object(Bucket=bucket, Key=key)
+    return json.loads(obj["Body"].read().decode("utf-8"))
+
+
+def sample_frames(video_stem: str, eligible_frames: Sequence[int], k: int, seed: int) -> List[int]:
+    if not eligible_frames or k <= 0:
+        return []
+    if k >= len(eligible_frames):
+        return sorted(eligible_frames)
+
+    rng = random.Random(f"{seed}:{video_stem}")
+    return sorted(rng.sample(list(eligible_frames), k))
+
+
+def create_condition_negative_shards(
+    csv_paths: Sequence[Path],
+    out_dir: Path,
+    *,
+    bucket: str = DEFAULT_BUCKET,
+    frames_per_video: int = 5,
+    frame_stride: int = 3,
+    frame_offset_mode: str = "video_hash",
+    frame_offset: int = 0,
+    shard_size: int = 100000,
+    negative_seed: int = 42,
+    result_type: str = "videorectangle",
+    from_name: Optional[str] = None,
+    to_name: Optional[str] = None,
+    aws_profile: Optional[str] = None,
+    cache_task_json_dir: Optional[Path] = None,
+) -> Dict[str, Any]:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    manifest_csv = out_dir / "condition_negative_manifest.csv"
+    summary_json = out_dir / "condition_negative_summary.json"
+    metadata_csv = out_dir / "condition_negative_video_metadata.csv"
+    if cache_task_json_dir is not None:
+        cache_task_json_dir.mkdir(parents=True, exist_ok=True)
+
+    session = boto3.Session(profile_name=aws_profile) if aws_profile else boto3.Session()
+    s3_client = session.client("s3")
+
+    rows, raw_condition_columns = load_condition_rows(csv_paths)
+    condition_columns = active_condition_columns(rows, raw_condition_columns)
+
+    selected_rows, targets, per_col_counts = greedy_select_balanced_rows(rows, condition_columns)
+
+    writer = TarShardWriter(out_dir, shard_size=shard_size, prefix="condition_negatives")
+
+    samples: List[VideoNegativeSample] = []
+    failures: List[Dict[str, str]] = []
+    metadata_records: List[VideoMetadataRecord] = []
+
+    for row in selected_rows:
+        try:
+            task_json = fetch_task_json(s3_client, bucket, row.s3_key)
+
+            if cache_task_json_dir is not None:
+                cache_task_json(task_json, cache_task_json_dir, row.s3_key)
+
+            item = extract_task_item(task_json, row.video_stem)
+
+            metadata_records.append(
+                extract_video_metadata_record(
+                    item,
+                    video_stem=row.video_stem,
+                    s3_key=row.s3_key,
+                    source_csv=row.source_csv,
+                )
+            )
+
+            results = extract_latest_results(
+                item,
+                result_type=result_type,
+                from_name=from_name,
+                to_name=to_name,
+            )
+            total_frames = infer_total_frames(item, results=results)
+            if total_frames <= 0:
+                failures.append({"video_stem": row.video_stem, "reason": "total_frames_unavailable"})
+                continue
+
+            positive = extract_positive_frames(
+                item,
+                result_type=result_type,
+                from_name=from_name,
+                to_name=to_name,
+            )
+            eligible = eligible_negative_frames(
+                row.video_stem,
+                total_frames,
+                positive,
+                frame_stride=frame_stride,
+                frame_offset_mode=frame_offset_mode,
+                frame_offset=frame_offset,
+            )
+
+            sampled = sample_frames(
+                row.video_stem,
+                eligible,
+                k=frames_per_video,
+                seed=negative_seed,
+            )
+            if not sampled:
+                failures.append({"video_stem": row.video_stem, "reason": "no_eligible_negative_frames"})
+                continue
+
+            for frame_idx in sampled:
+                writer.write_text(f"{row.video_stem}/frame_{frame_idx:06d}.txt", "")
+
+            samples.append(
+                VideoNegativeSample(
+                    video_stem=row.video_stem,
+                    s3_key=row.s3_key,
+                    sampled_frames=sampled,
+                    total_frames=total_frames,
+                    positive_frames=len(positive),
+                    eligible_negative_frames=len(eligible),
+                    conditions=row.conditions,
+                    source_csv=row.source_csv,
+                )
+            )
+        except Exception as e:
+            failures.append({"video_stem": row.video_stem, "reason": repr(e)})
+
+    writer.close()
+
+    with manifest_csv.open("w", newline="", encoding="utf-8") as f:
+        fieldnames = [
+            "video_stem",
+            "s3_key",
+            "source_csv",
+            "total_frames",
+            "positive_frames",
+            "eligible_negative_frames",
+            "sampled_frames",
+        ] + condition_columns
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for s in samples:
+            row = {
+                "video_stem": s.video_stem,
+                "s3_key": s.s3_key,
+                "source_csv": s.source_csv,
+                "total_frames": s.total_frames,
+                "positive_frames": s.positive_frames,
+                "eligible_negative_frames": s.eligible_negative_frames,
+                "sampled_frames": " ".join(str(x) for x in s.sampled_frames),
+            }
+            for col in condition_columns:
+                row[col] = s.conditions.get(col, "")
+            w.writerow(row)
+
+    with metadata_csv.open("w", newline="", encoding="utf-8") as f:
+        fieldnames = [
+            "video_stem",
+            "s3_key",
+            "fps",
+            "nb_frames",
+            "duration",
+            "width",
+            "height",
+            "org",
+            "site",
+            "device",
+            "source_csv",
+        ]
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for rec in metadata_records:
+            w.writerow(asdict(rec))
+
+    selected_condition_counts: Dict[str, Counter] = {}
+    for col in condition_columns:
+        c = Counter()
+        for s in samples:
+            if col in s.conditions:
+                c[s.conditions[col]] += 1
+        selected_condition_counts[col] = c
+
+    summary = {
+        "bucket": bucket,
+        "csv_paths": [str(p) for p in csv_paths],
+        "condition_columns": condition_columns,
+        "input_rows": len(rows),
+        "selected_videos_before_fetch": len(selected_rows),
+        "written_videos": len(samples),
+        "written_negative_frames": sum(len(s.sampled_frames) for s in samples),
+        "frames_per_video": frames_per_video,
+        "frame_stride": frame_stride,
+        "frame_offset_mode": frame_offset_mode,
+        "frame_offset": frame_offset,
+        "metadata_csv": str(metadata_csv),
+        "cached_task_json_dir": str(cache_task_json_dir) if cache_task_json_dir is not None else "",
+        "metadata_records_written": len(metadata_records),
+        "targets_by_condition": {
+            col: {val: targets[(col, val)] for val in per_col_counts.get(col, {})}
+            for col in condition_columns
+        },
+        "input_counts_by_condition": {
+            col: dict(per_col_counts[col]) for col in condition_columns
+        },
+        "selected_counts_by_condition": {
+            col: dict(selected_condition_counts[col]) for col in condition_columns
+        },
+        "failures": failures,
+    }
+    summary_json.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+    return summary
diff --git a/training/object-detection/src/object_detection/splits/cli.py b/training/object-detection/src/object_detection/splits/cli.py
new file mode 100644
index 0000000..cbd00b2
--- /dev/null
+++ b/training/object-detection/src/object_detection/splits/cli.py
@@ -0,0 +1,129 @@
+import argparse
+import csv
+import json
+from pathlib import Path
+
+from object_detection.splits.splitter import (
+        build_groups,
+        split_groups_greedy,
+        write_manifest,
+        summarize_split,
+)
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--labels-root", required=True, type=Path,
+                    help="Root of exploded YOLO labels, e.g. data/99_work/yolo_annos_exploded")
+    ap.add_argument("--out-dir", required=True, type=Path,
+                    help="Output directory for split manifests")
+    ap.add_argument("--sites", nargs="*", default=["tankeeah", "kitwanga", "bear"],
+                    help="Sites to include (baseline)")
+    ap.add_argument("--seed", type=int, default=42)
+    ap.add_argument("--train-frac", type=float, default=0.80)
+    ap.add_argument("--val-frac", type=float, default=0.10)
+    ap.add_argument("--test-frac", type=float, default=0.10)
+    ap.add_argument("--limit-files", type=int, default=None,
+                    help="Debug: limit to N random label files")
+
+    # Objective weights
+    ap.add_argument("--w-class", type=float, default=4.0)
+    ap.add_argument("--w-tod", type=float, default=1.0)
+    ap.add_argument("--w-density", type=float, default=1.0)
+    ap.add_argument("--w-area", type=float, default=1.0)
+    ap.add_argument("--w-ar", type=float, default=1.0)
+    ap.add_argument("--w-size", type=float, default=2.0)
+
+    args = ap.parse_args()
+
+    if not args.labels_root.exists():
+        raise SystemExit(f"labels-root not found: {args.labels_root}")
+
+    ssum = args.train_frac + args.val_frac + args.test_frac
+    if abs(ssum - 1.0) > 1e-6:
+        raise SystemExit(f"train/val/test fractions must sum to 1.0; got {ssum}")
+
+    groups = build_groups(
+        labels_root=args.labels_root,
+        sites_keep=args.sites,
+        seed=args.seed,
+        limit=args.limit_files,
+    )
+
+    # Split
+    weights = {
+        "class": args.w_class,
+        "tod": args.w_tod,
+        "density": args.w_density,
+        "area": args.w_area,
+        "ar": args.w_ar,
+        "size": args.w_size,
+    }
+
+    train, val, test, report = split_groups_greedy(
+        groups=groups,
+        seed=args.seed,
+        train_frac=args.train_frac,
+        val_frac=args.val_frac,
+        test_frac=args.test_frac,
+        weights=weights,
+    )
+
+    out_dir = args.out_dir
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # Write manifests (relative paths to label files)
+    write_manifest(out_dir / "train.txt", train.frame_paths)
+    write_manifest(out_dir / "val.txt", val.frame_paths)
+    write_manifest(out_dir / "test.txt", test.frame_paths)
+
+    # Group assignment CSV
+    with (out_dir / "group_assignments.csv").open("w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(["group_id", "split", "site", "device", "date", "n_frames", "n_boxes"])
+        for s in [train, val, test]:
+            for gid in s.group_ids:
+                g = groups[gid]
+                w.writerow([gid, s.name, g.site, g.device, g.date, g.n_frames, g.n_boxes])
+
+    # JSON report
+    full_report = {
+        "params": {
+            "labels_root": str(args.labels_root),
+            "sites": args.sites,
+            "seed": args.seed,
+            "fractions": {"train": args.train_frac, "val": args.val_frac, "test": args.test_frac},
+            "weights": weights,
+            "grouping": "group_id = site|device|YYYYMMDD",
+            "notes": [
+                "Split is group-wise to reduce leakage from temporally adjacent frames.",
+                "Time-of-day bucket derives from video clip HHMMSS in stem; frames inherit clip bucket.",
+                "Balancing is soft; rare classes are prioritized earlier in greedy assignment.",
+            ],
+        },
+        "targets": {
+            "total_frames": report["total_frames"],
+            "target_frames": report["target_frames"],
+            "actual_frames": report["actual_frames"],
+            "global_class_dist": report["class_dist"],
+            "global_tod_dist": report["tod_dist"],
+            "global_density_dist": report["density_dist"],
+            "global_area_dist": report["area_dist"],
+            "global_ar_dist": report["ar_dist"],
+        },
+        "splits": {
+            "train": summarize_split(train),
+            "val": summarize_split(val),
+            "test": summarize_split(test),
+        },
+    }
+
+    (out_dir / "split_report.json").write_text(json.dumps(full_report, indent=2, sort_keys=True) + "\n")
+
+    print("[make_splits] wrote:")
+    print(f"  {out_dir / 'train.txt'} ({len(train.frame_paths)} frames)")
+    print(f"  {out_dir / 'val.txt'}   ({len(val.frame_paths)} frames)")
+    print(f"  {out_dir / 'test.txt'}  ({len(test.frame_paths)} frames)")
+    print(f"  {out_dir / 'group_assignments.csv'}")
+    print(f"  {out_dir / 'split_report.json'}")
+
+
diff --git a/training/object-detection/src/object_detection/splits/parsing.py b/training/object-detection/src/object_detection/splits/parsing.py
new file mode 100644
index 0000000..bf21982
--- /dev/null
+++ b/training/object-detection/src/object_detection/splits/parsing.py
@@ -0,0 +1,120 @@
+import re
+from pathlib import Path
+from typing import Dict, Tuple, Optional
+from collections import Counter
+
+from object_detection.utils.utils import safe_float
+
+def time_bucket(hhmmss: str) -> str:
+    """Coarse time-of-day buckets based on HH."""
+    try:
+        hh = int(hhmmss[0:2])
+    except Exception:
+        return "unknown"
+    if 0 <= hh <= 5:
+        return "night"
+    if 6 <= hh <= 11:
+        return "morning"
+    if 12 <= hh <= 17:
+        return "afternoon"
+    if 18 <= hh <= 23:
+        return "evening"
+    return "unknown"
+
+
+def density_bin(n_boxes: int) -> str:
+    """Bins for boxes per frame."""
+    if n_boxes <= 0:
+        return "0"
+    if n_boxes == 1:
+        return "1"
+    if n_boxes == 2:
+        return "2"
+    if 3 <= n_boxes <= 4:
+        return "3-4"
+    if 5 <= n_boxes <= 9:
+        return "5-9"
+    return "10+"
+
+def ar_bin(w: float, h: float) -> str:
+    """
+    Aspect ratio bins based on w/h.
+    w,h are YOLO normalized widths/heights in [0,1].
+    """
+    if w <= 0 or h <= 0:
+        return "invalid"
+    r = w / h
+
+    # You can tune these thresholds, but this is a good start:
+    if r < 0.67:
+        return "tall"        # height-dominant
+    if r <= 1.5:
+        return "square"      # roughly square-ish
+    return "wide"            # width-dominant
+
+def area_bin(area: float) -> str:
+    """
+    Bin YOLO normalized bbox area (w*h) in [0,1].
+    Tune thresholds if needed.
+    """
+    if area <= 0:
+        return "0"
+    if area < 0.0025:
+        return "<0.0025"
+    if area < 0.01:
+        return "0.0025-0.01"
+    if area < 0.04:
+        return "0.01-0.04"
+    if area < 0.16:
+        return "0.04-0.16"
+    return ">=0.16"
+
+def parse_frame_idx(filename: str) -> Optional[int]:
+    # frame_000123.txt
+    m = re.match(r"^frame_(\d+)\.txt$", filename)
+    if not m:
+        return None
+    try:
+        return int(m.group(1))
+    except Exception:
+        return None
+
+
+def read_yolo_label(path: Path) -> Tuple[int, Counter, Counter, Counter]:
+    """
+    Returns:
+      n_boxes,
+      class_counts (class_id -> count),
+      area_bins (area_bin -> count)
+    """
+    n_boxes = 0
+    class_counts: Counter = Counter()
+    area_counts: Counter = Counter()
+    ar_counts: Counter = Counter()
+
+    try:
+        txt = path.read_text().strip()
+    except Exception:
+        return 0, Counter(), Counter(), Counter()
+
+    if not txt:
+        return 0, Counter(), Counter(), Counter()
+
+    for line in txt.splitlines():
+        parts = line.strip().split()
+        if len(parts) < 5:
+            continue
+        cls = parts[0]
+        w = safe_float(parts[3], 0.0)
+        h = safe_float(parts[4], 0.0)
+        try:
+            cls_id = int(cls)
+        except Exception:
+            continue
+        n_boxes += 1
+        class_counts[cls_id] += 1
+        area_counts[area_bin(w * h)] += 1
+        ar_counts[ar_bin(w, h)] += 1
+
+    return n_boxes, class_counts, area_counts, ar_counts
+
diff --git a/training/object-detection/src/object_detection/splits/splitter.py b/training/object-detection/src/object_detection/splits/splitter.py
new file mode 100644
index 0000000..12dd77d
--- /dev/null
+++ b/training/object-detection/src/object_detection/splits/splitter.py
@@ -0,0 +1,361 @@
+import random
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Iterable, Any
+from collections import Counter
+
+from object_detection.splits.parsing import (
+        time_bucket,
+        density_bin,
+        parse_frame_idx,
+        read_yolo_label,
+)
+from object_detection.utils.utils import parse_video_stem
+
+# -----------------------------
+# Data structures
+# -----------------------------
+
+@dataclass
+class FrameRecord:
+    rel_path: str            # relative path to label file within labels_root
+    video_stem: str
+    frame_idx: int
+    org: str
+    site: str
+    device: str
+    date: str                # YYYYMMDD
+    tod: str                 # time-of-day bucket
+
+    n_boxes: int
+    class_counts: Counter    # class_id -> count
+    density_bin: str
+    area_bins: Counter       # area_bin -> count (counts per box)
+
+
+@dataclass
+class GroupStats:
+    group_id: str
+    site: str
+    device: str
+    date: str
+
+    n_frames: int
+    n_boxes: int
+
+    class_counts: Counter
+    tod_counts: Counter
+    density_counts: Counter
+    area_counts: Counter
+    ar_counts: Counter
+
+    # list of frame rel_paths (for manifest writing)
+    frame_paths: List[str]
+
+
+# -----------------------------
+# Scanning labels
+# -----------------------------
+
+def iter_label_files(labels_root: Path) -> Iterable[Path]:
+    # Expect structure: labels_root/<video_stem>/frame_000123.txt
+    for p in labels_root.rglob("frame_*.txt"):
+        if p.is_file():
+            yield p
+
+def build_groups(
+    labels_root: Path,
+    sites_keep: List[str],
+    seed: int,
+    limit: Optional[int] = None,
+) -> Dict[str, GroupStats]:
+    """
+    Build per-group aggregates. Group id = site|device|date.
+    """
+    rnd = random.Random(seed)
+
+    files = list(iter_label_files(labels_root))
+    files.sort()
+    if limit is not None:
+        rnd.shuffle(files)
+        files = files[:limit]
+
+    groups: Dict[str, GroupStats] = {}
+
+    skipped = 0
+    for f in files:
+        video_stem = f.parent.name
+        meta = parse_video_stem(video_stem)
+        if meta is None:
+            skipped += 1
+            continue
+
+        site = meta["site"]
+        if sites_keep and site not in sites_keep:
+            continue
+
+        frame_idx = parse_frame_idx(f.name)
+        if frame_idx is None:
+            skipped += 1
+            continue
+
+        n_boxes, class_counts, area_counts, ar_counts = read_yolo_label(f)
+        # If you ever include negatives, you may want to keep empty files too.
+        # For now, assume label files exist only when boxes exist.
+        dens_bin = density_bin(n_boxes)
+        tod = time_bucket(meta["time"])
+
+        rel_path = str(f.relative_to(labels_root))
+
+        group_id = f"{site}|{meta['device']}|{meta['date']}"
+        if group_id not in groups:
+            groups[group_id] = GroupStats(
+                group_id=group_id,
+                site=site,
+                device=meta["device"],
+                date=meta["date"],
+                n_frames=0,
+                n_boxes=0,
+                class_counts=Counter(),
+                tod_counts=Counter(),
+                density_counts=Counter(),
+                area_counts=Counter(),
+                ar_counts=Counter(),
+                frame_paths=[],
+            )
+
+        g = groups[group_id]
+        g.n_frames += 1
+        g.n_boxes += n_boxes
+        g.class_counts.update(class_counts)
+        g.tod_counts[tod] += 1
+        g.density_counts[dens_bin] += 1
+        g.area_counts.update(area_counts)
+        g.ar_counts.update(ar_counts)
+        g.frame_paths.append(rel_path)
+
+    if skipped:
+        print(f"[make_splits] skipped {skipped} files due to parse issues")
+    print(f"[make_splits] groups={len(groups)} from label files={len(files)}")
+    return groups
+
+
+# -----------------------------
+# Split objective
+# -----------------------------
+
+def normalize_counter(c: Counter) -> Dict[Any, float]:
+    s = float(sum(c.values()))
+    if s <= 0:
+        return {}
+    return {k: v / s for k, v in c.items()}
+
+
+def l1_dist(p: Dict[Any, float], q: Dict[Any, float], keys: Iterable[Any]) -> float:
+    d = 0.0
+    for k in keys:
+        d += abs(p.get(k, 0.0) - q.get(k, 0.0))
+    return d
+
+
+@dataclass
+class SplitState:
+    name: str
+    target_frac: float
+    n_frames: int = 0
+
+    class_counts: Counter = None
+    tod_counts: Counter = None
+    density_counts: Counter = None
+    area_counts: Counter = None
+    ar_counts: Counter = None
+
+    group_ids: List[str] = None
+    frame_paths: List[str] = None
+
+    def __post_init__(self):
+        self.class_counts = Counter()
+        self.tod_counts = Counter()
+        self.density_counts = Counter()
+        self.area_counts = Counter()
+        self.ar_counts = Counter()
+        self.group_ids = []
+        self.frame_paths = []
+
+    def add_group(self, g: GroupStats):
+        self.n_frames += g.n_frames
+        self.class_counts.update(g.class_counts)
+        self.tod_counts.update(g.tod_counts)
+        self.density_counts.update(g.density_counts)
+        self.area_counts.update(g.area_counts)
+        self.ar_counts.update(g.ar_counts)
+        self.group_ids.append(g.group_id)
+        self.frame_paths.extend(g.frame_paths)
+
+
+def compute_global_targets(groups: List[GroupStats]) -> Dict[str, Any]:
+    total_frames = sum(g.n_frames for g in groups)
+
+    global_class = Counter()
+    global_tod = Counter()
+    global_density = Counter()
+    global_area = Counter()
+    global_ar = Counter()
+
+    for g in groups:
+        global_class.update(g.class_counts)
+        global_tod.update(g.tod_counts)
+        global_density.update(g.density_counts)
+        global_area.update(g.area_counts)
+        global_ar.update(g.ar_counts)
+
+    targets = {
+        "total_frames": total_frames,
+        "class_keys": sorted(global_class.keys()),
+        "tod_keys": sorted(global_tod.keys()),
+        "density_keys": sorted(global_density.keys()),
+        "area_keys": sorted(global_area.keys()),
+        "ar_keys": sorted(global_ar.keys()),
+        "class_dist": normalize_counter(global_class),
+        "tod_dist": normalize_counter(global_tod),
+        "density_dist": normalize_counter(global_density),
+        "area_dist": normalize_counter(global_area),
+        "ar_dist": normalize_counter(global_ar),
+    }
+    return targets
+
+
+def rarity_score(g: GroupStats, global_class_dist: Dict[int, float]) -> float:
+    """
+    Higher score => assign earlier.
+    Use inverse frequency weighting on classes present in the group.
+    """
+    s = 0.0
+    for cls_id, cnt in g.class_counts.items():
+        p = global_class_dist.get(cls_id, 1e-12)
+        # weight by amount of that class in the group
+        s += cnt * (1.0 / max(p, 1e-6))
+    # also emphasize very dense groups a bit
+    s += 0.25 * g.n_boxes
+    return s
+
+
+def split_groups_greedy(
+    groups: Dict[str, GroupStats],
+    seed: int,
+    train_frac: float,
+    val_frac: float,
+    test_frac: float,
+    weights: Dict[str, float],
+) -> Tuple[SplitState, SplitState, SplitState, Dict[str, Any]]:
+    """
+    Greedy group assignment minimizing distance to global distributions + size penalty.
+
+    weights keys: class, tod, density, area, size
+    """
+    rnd = random.Random(seed)
+    group_list = list(groups.values())
+
+    targets = compute_global_targets(group_list)
+    total_frames = targets["total_frames"]
+
+    # Sort groups by rarity (desc), stable tie-break with seed
+    rnd.shuffle(group_list)
+    group_list.sort(key=lambda g: rarity_score(g, targets["class_dist"]), reverse=True)
+
+    train = SplitState("train", train_frac)
+    val = SplitState("val", val_frac)
+    test = SplitState("test", test_frac)
+    splits = [test, val, train]
+
+    # precompute target frame counts
+    target_frames = {
+        "train": train_frac * total_frames,
+        "val": val_frac * total_frames,
+        "test": test_frac * total_frames,
+    }
+
+    def score_split(after: SplitState) -> float:
+        # distribution distances (L1)
+        class_d = l1_dist(normalize_counter(after.class_counts), targets["class_dist"], targets["class_keys"])
+        tod_d = l1_dist(normalize_counter(after.tod_counts), targets["tod_dist"], targets["tod_keys"])
+        dens_d = l1_dist(normalize_counter(after.density_counts), targets["density_dist"], targets["density_keys"])
+        area_d = l1_dist(normalize_counter(after.area_counts), targets["area_dist"], targets["area_keys"])
+        ar_d = l1_dist(normalize_counter(after.ar_counts), targets["ar_dist"], targets["ar_keys"])
+
+        # size penalty: keep n_frames close to target
+        tf = target_frames[after.name]
+        size_d = abs(after.n_frames - tf) / max(tf, 1.0)
+
+        return (
+            weights["class"] * class_d +
+            weights["tod"] * tod_d +
+            weights["density"] * dens_d +
+            weights["area"] * area_d +
+            weights["ar"] * ar_d +
+            weights["size"] * size_d
+        )
+
+    # Greedy: for each group, try each split, pick minimal total score across all splits
+    for g in group_list:
+        best = None
+        best_score = float("inf")
+
+        for s in splits:
+            # clone minimal stats (cheap-ish since Counters)
+            tmp = SplitState(s.name, s.target_frac)
+            tmp.n_frames = s.n_frames
+            tmp.class_counts = s.class_counts.copy()
+            tmp.tod_counts = s.tod_counts.copy()
+            tmp.density_counts = s.density_counts.copy()
+            tmp.area_counts = s.area_counts.copy()
+
+            tmp.add_group(g)
+
+            # compute global score as sum of each split score
+            # (this keeps all splits moving toward their targets)
+            total = 0.0
+            for other in splits:
+                if other.name == s.name:
+                    total += score_split(tmp)
+                else:
+                    total += score_split(other)
+
+            if total < best_score:
+                best_score = total
+                best = s
+
+        assert best is not None
+        best.add_group(g)
+
+    report = {
+        "total_frames": total_frames,
+        "target_frames": target_frames,
+        "actual_frames": {s.name: s.n_frames for s in splits},
+    }
+    return train, val, test, {**targets, **report}
+
+
+# -----------------------------
+# Reporting + writing
+# -----------------------------
+
+def summarize_split(s: SplitState) -> Dict[str, Any]:
+    return {
+        "n_frames": s.n_frames,
+        "n_boxes": int(sum(s.class_counts.values())),
+        "n_groups": len(s.group_ids),
+        "class_counts": dict(s.class_counts),
+        "tod_counts": dict(s.tod_counts),
+        "density_counts": dict(s.density_counts),
+        "area_counts": dict(s.area_counts),
+        "ar_counts": dict(s.ar_counts),
+    }
+
+
+def write_manifest(out_path: Path, rel_paths: List[str]):
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    rel_paths = list(rel_paths)
+    rel_paths.sort()
+    out_path.write_text("\n".join(rel_paths) + ("\n" if rel_paths else ""))
+
diff --git a/training/object-detection/src/object_detection/utils/__init__.py b/training/object-detection/src/object_detection/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/src/object_detection/utils/utils.py b/training/object-detection/src/object_detection/utils/utils.py
new file mode 100644
index 0000000..a3506da
--- /dev/null
+++ b/training/object-detection/src/object_detection/utils/utils.py
@@ -0,0 +1,56 @@
+import re
+from typing import Dict, Optional
+
+def safe_float(x: str, default: float = 0.0) -> float:
+    try:
+        return float(x)
+    except Exception:
+        return default
+
+
+_STEM_RE = re.compile(
+    r"""
+    ^
+    (?P<prefix>.+?)
+    _
+    (?P<date>\d{8})
+    _
+    (?P<time>\d{6})
+    _
+    (?P<suffix>.+)
+    $
+    """,
+    re.VERBOSE,
+)
+
+def parse_video_stem(video_stem: str) -> Optional[Dict[str, str]]:
+    """
+    Example:
+      HIRMD-tankeeah-jetson-0_20250714_012827_M
+    ->
+      {
+        "org": "HIRMD",
+        "site": "tankeeah",
+        "device": "jetson-0",
+        "date": "20250714",
+        "time": "012827",
+        "suffix": "M",
+      }
+    """
+    m = _STEM_RE.match(video_stem)
+    if not m:
+        return None
+
+    prefix = m.group("prefix")
+    parts = prefix.split("-")
+    if len(parts) < 3:
+        return None
+
+    return {
+        "org": parts[0],
+        "site": parts[1],
+        "device": "-".join(parts[2:]),
+        "date": m.group("date"),
+        "time": m.group("time"),
+        "suffix": m.group("suffix"),
+    }
diff --git a/training/object-detection/src/object_detection/yolo_ls/__init__.py b/training/object-detection/src/object_detection/yolo_ls/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/training/object-detection/src/object_detection/yolo_ls/cli.py b/training/object-detection/src/object_detection/yolo_ls/cli.py
new file mode 100644
index 0000000..badb1b2
--- /dev/null
+++ b/training/object-detection/src/object_detection/yolo_ls/cli.py
@@ -0,0 +1,87 @@
+import argparse
+from pathlib import Path
+
+from object_detection.yolo_ls.converter import YoloConverterLSVideo
+from object_detection.yolo_ls.parsing import load_class_map_from_yolo_yaml
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Convert Label Studio video JSON to YOLO frame labels")
+    parser.add_argument("input", help="JSON file or directory containing Label Studio JSON")
+    parser.add_argument("--data-yaml", required=True, help="Path to YOLO data.yaml (with 'names:' mapping)")
+    parser.add_argument("--out", required=True, help="Output directory")
+    parser.add_argument("--empty-list", default=None, help="Path to write videos with no boxes")
+    parser.add_argument("--pattern", default="*.json", help="Glob when input is a directory")
+    parser.add_argument("--overwrite", action="store_true", help="Overwrite existing per-video folders")
+    parser.add_argument("--from-name", default=None, help="Filter by result.from_name (e.g., 'box')")
+    parser.add_argument("--to-name", default=None, help="Filter by result.to_name (e.g., 'video')")
+    parser.add_argument("--coord-mode", default="percent", help='Set the coordinates mode: "auto", "percent", "normalized", "pixel"')
+    parser.add_argument("--include-sites", nargs="*", default=[], help='Only include videos of these sites')
+    parser.add_argument("--out-shards", default=None, help="Directory to write TAR shards (instead of many files)")
+    parser.add_argument("--shard-size", type=int, default=10000, help="Number of frame label files per shard")
+    parser.add_argument("--frame-stride", type=int, default=1,
+                    help="Keep every Nth frame (1 keeps all)")
+    parser.add_argument("--frame-offset-mode", choices=["fixed", "video_hash"], default="video_hash",
+                        help="How to choose offset within stride")
+    parser.add_argument("--frame-offset", type=int, default=0,
+                        help="Offset for fixed mode (0..stride-1)")
+    parser.add_argument("--include-negatives", action="store_true",
+                    help="Add negative frames from videos with no annotations")
+    parser.add_argument("--negative-ratio", type=float, default=0.10,
+                        help="Max negatives as fraction of final dataset")
+    parser.add_argument("--negatives-per-video", type=int, default=6,
+                        help="Max sampled negative frames per empty video")
+    parser.add_argument("--negative-seed", type=int, default=42,
+                        help="Seed for deterministic negative sampling")
+
+    return parser
+
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    data_yaml_path = Path(args.data_yaml)
+    class_map = load_class_map_from_yolo_yaml(data_yaml_path)
+
+    conv = YoloConverterLSVideo(
+        class_map=class_map,
+        output_dir=Path(args.out),
+        empty_list_path=Path(args.empty_list) if args.empty_list else None,
+        overwrite_video_dir=args.overwrite,
+        from_name=args.from_name,
+        to_name=args.to_name,
+        coord_mode=args.coord_mode,
+        include_sites=args.include_sites,
+        shard_dir=Path(args.out_shards) if args.out_shards else None,
+        shard_size=args.shard_size,
+        frame_stride=args.frame_stride,
+        frame_offset_mode=args.frame_offset_mode,
+        frame_offset=args.frame_offset,
+        include_negatives=args.include_negatives,
+        negative_ratio=args.negative_ratio,
+        negatives_per_video=args.negatives_per_video,
+        negative_seed=args.negative_seed,
+    )
+
+    inp = Path(args.input)
+    if inp.is_dir():
+        print(f"Converting labels from {inp}")
+
+        s = conv.convert_folder(inp, pattern=args.pattern)
+    else:
+        s = conv.convert_file(inp)
+
+    neg_written, max_neg, total_candidate_frames = conv.materialize_negatives()
+    s.negative_files_written += neg_written
+    s.total_candidate_negative_frames += total_candidate_frames
+
+    if getattr(conv, "_sharder", None):
+        conv._sharder.close()
+
+    print(
+        f"Done. with_boxes={s.videos_with_boxes} without_boxes={s.videos_without_boxes} "
+        f"positive_label_files={s.label_files_written} "
+        f"negative_label_files={s.negative_files_written} "
+        f"total_candidate_negative_frames={s.total_candidate_negative_frames} "
+        f"max_neg={max_neg} "
+        f"errors={s.errors}"
+    )
diff --git a/training/object-detection/src/object_detection/yolo_ls/converter.py b/training/object-detection/src/object_detection/yolo_ls/converter.py
new file mode 100644
index 0000000..930bb94
--- /dev/null
+++ b/training/object-detection/src/object_detection/yolo_ls/converter.py
@@ -0,0 +1,557 @@
+import json
+import traceback
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple, Any
+from collections import defaultdict
+from datetime import datetime
+import random
+import zlib
+
+from object_detection.yolo_ls.shards import TarShardWriter
+from object_detection.yolo_ls.parsing import (
+    coord_mode,
+    to_yolo,
+)
+from object_detection.utils.utils import safe_float
+
+@dataclass
+class ConvertStats:
+    videos_with_boxes: int = 0
+    videos_without_boxes: int = 0
+    label_lines_written: int = 0      # box lines
+    label_files_written: int = 0      # positive frame txt files
+    negative_files_written: int = 0   # negative frame txt files
+    total_candidate_negative_frames: int = 0   # total candidate negative frames
+    errors: int = 0
+
+@dataclass
+class NegativeVideoCandidate:
+    video_stem: str
+    video_uri: str
+    total_frames: int
+
+class YoloConverterLSVideo:
+    """
+    Convert the provided Label Studio 'video' export (annotations + data) to YOLO frame txts.
+
+    Input structure:
+    [
+      {
+        "data": {
+          "metadata_video_width": 1280,
+          "metadata_video_height": 720,
+          "video": "s3://.../GOLD-kitkiata-jetson-1_20240720_002007_M.mp4",
+          "metadata_file_filename": "GOLD-kitkiata-jetson-1_20240720_002007_M.mp4",
+          ...
+        },
+        "annotations": [
+          {
+            "result": [
+              {
+                "type": "videorectangle",
+                "from_name": "box",
+                "to_name": "video",
+                "value": {
+                  "labels": ["Rainbow"],
+                  "sequence": [
+                    {"frame": 47, "x": 0, "y": 62.83, "width": 15.96, "height": 15.16, "enabled": true, ...},
+                    ...
+                  ]
+                }
+              },
+              ...
+            ]
+          }
+        ]
+      },
+      {
+        ...
+      },
+      ...
+    ]
+
+    Writes:
+      <out_dir>/<video_stem>/frame_000047.txt  # one line per box in that frame
+    """
+
+    def __init__(
+        self,
+        class_map: Dict[str, int],
+        output_dir: Path,
+        empty_list_path: Optional[Path] = None,
+        overwrite_video_dir: bool = False,
+        result_type: str = "videorectangle",
+        from_name: Optional[str] = None,  # e.g., "box"; if None accept any
+        to_name: Optional[str] = None,    # e.g., "video"; if None accept any
+        coord_mode: str = "auto",         # "auto", "percent", "normalized", "pixel"
+        error_log_path: Optional[Path] = None,
+        include_sites: Optional[List[str]] = None,
+        shard_dir: Optional[Path] = None,
+        shard_size: int = 10000,
+        frame_stride: int = 1,
+        frame_offset_mode: str = "fixed",
+        frame_offset: int = 0,
+        include_negatives: bool = False,
+        negative_ratio: float = 0.10,
+        negatives_per_video: int = 6,
+        negative_seed: int = 42,
+    ):
+        """
+        :param coord_mode:
+            "auto"       -> infer (default)
+            "percent"    -> x/y/width/height are 0..100
+            "normalized" -> x/y/width/height are 0..1
+            "pixel"      -> x/y/width/height are in pixels
+        :param error_log_path: where to append error tracebacks.
+                           If None, defaults to <output_dir>/ls_to_yolo_errors.log
+        """
+        self.class_map = class_map
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.empty_list_path = Path(empty_list_path) if empty_list_path else None
+        self.overwrite_video_dir = overwrite_video_dir
+        self.result_type = result_type
+        self.from_name = from_name
+        self.to_name = to_name
+        self.coord_mode = coord_mode
+        self.error_log_path = (
+            Path(error_log_path) if error_log_path else (self.output_dir / "ls_to_yolo_errors.log")
+        )
+        self.include_sites = include_sites or []
+        self.shard_dir = Path(shard_dir) if shard_dir else None
+        self.shard_size = int(shard_size)
+        self._sharder = TarShardWriter(self.shard_dir, shard_size=self.shard_size) if self.shard_dir else None
+        self.frame_stride = max(1, int(frame_stride))
+        self.frame_offset_mode = frame_offset_mode
+        self.frame_offset = int(frame_offset)
+
+        self.include_negatives = include_negatives
+        self.negative_ratio = float(negative_ratio)
+        self.negatives_per_video = int(negatives_per_video)
+        self.negative_seed = int(negative_seed)
+
+        self._positive_frame_files_written = 0
+        self._negative_frame_files_written = 0
+        self._negative_candidates: List[NegativeVideoCandidate] = []
+
+    # ---- public API ----
+
+    def convert_folder(self, json_dir: Path, pattern: str = "*.json") -> ConvertStats:
+        stats = ConvertStats()
+        for p in sorted(Path(json_dir).glob(pattern)):
+            try:
+                s = self.convert_file(p)
+                stats.videos_with_boxes += s.videos_with_boxes
+                stats.videos_without_boxes += s.videos_without_boxes
+                stats.label_files_written += s.label_files_written
+                stats.label_lines_written += s.label_lines_written
+                stats.negative_files_written += s.negative_files_written
+                stats.total_candidate_negative_frames += s.total_candidate_negative_frames
+            except Exception as e:
+                stats.errors += 1
+                self._log_error(f"convert_file({p})", e)
+        return stats
+
+    def convert_file(self, json_path: Path) -> ConvertStats:
+        stats = ConvertStats()
+        json_path = Path(json_path)
+
+        try:
+            items = json.loads(json_path.read_text())
+        except Exception as e:
+            stats.errors += 1
+            self._log_error(f"read_json({json_path})", e)
+            return stats
+
+        if not isinstance(items, list):
+            err = ValueError(f"{json_path} must contain a top-level list")
+            stats.errors += 1
+            self._log_error(f"validate_json({json_path})", err)
+            return stats
+
+        for item in items:
+            try:
+                s = self._convert_item(item)
+                stats.videos_with_boxes += s.videos_with_boxes
+                stats.videos_without_boxes += s.videos_without_boxes
+                stats.label_files_written += s.label_files_written
+                stats.label_lines_written += s.label_lines_written
+                stats.negative_files_written += s.negative_files_written
+                stats.total_candidate_negative_frames += s.total_candidate_negative_frames
+            except Exception as e:
+                stats.errors += 1
+                item_id = item.get("id", "unknown")
+                self._log_error(f"_convert_item(id={item_id}, src={json_path})", e)
+        return stats
+
+    def materialize_negatives(self) -> Tuple[int, int, int]:
+        """
+        Sample negatives globally so negatives are at most self.negative_ratio
+        of the final dataset. Returns number of negative files written.
+        """
+        if not self.include_negatives:
+            return 0, 0, 0
+
+        pos = self._positive_frame_files_written
+        if pos <= 0:
+            return 0, 0, 0
+
+        r = self.negative_ratio
+        if r <= 0:
+            return 0, 0, 0
+        if r >= 1:
+            max_neg = sum(
+                min(self.negatives_per_video, len(self._eligible_frames_for_video(c.video_stem, c.total_frames)))
+                for c in self._negative_candidates
+            )
+        else:
+            max_neg = int((r / (1.0 - r)) * pos)
+
+        if max_neg <= 0:
+            return 0, 0, 0
+
+        # Build all candidates at the video level first
+        per_video_samples: Dict[str, List[int]] = {}
+        total_candidate_frames = 0
+
+        for c in self._negative_candidates:
+            eligible = self._eligible_frames_for_video(c.video_stem, c.total_frames)
+            if not eligible:
+                continue
+
+            k = min(self.negatives_per_video, len(eligible))
+            sampled = self._sample_negative_frames_for_video(c.video_stem, c.total_frames, k)
+            if sampled:
+                per_video_samples[c.video_stem] = sampled
+                total_candidate_frames += len(sampled)
+
+        if total_candidate_frames <= 0:
+            return 0, 0, total_candidate_frames
+
+        # Flatten candidates, then globally subsample if needed
+        flat: List[Tuple[str, int]] = []
+        for video_stem, frames in per_video_samples.items():
+            for frame_idx in frames:
+                flat.append((video_stem, frame_idx))
+
+        # Deterministic global shuffle
+        flat.sort()
+        rng = random.Random(self.negative_seed)
+        rng.shuffle(flat)
+
+        flat = flat[:max_neg]
+
+        # Write empty labels
+        wrote = 0
+        for video_stem, frame_idx in sorted(flat):
+            self._write_label(video_stem, frame_idx, "")
+            wrote += 1
+
+        self._negative_frame_files_written += wrote
+        return wrote, max_neg, total_candidate_frames
+
+    # ---- internals ----
+
+    def _stride_offset(self, video_stem: str) -> int:
+        if self.frame_stride <= 1:
+            return 0
+        if self.frame_offset_mode == "fixed":
+            return int(self.frame_offset) % self.frame_stride
+        if self.frame_offset_mode == "video_hash":
+            # deterministic across runs + platforms
+            return zlib.crc32(video_stem.encode("utf-8")) % self.frame_stride
+
+        raise ValueError("Invalid frame offset mode")
+
+    @staticmethod
+    def _parse_ffmpeg_rate(rate: Any) -> float:
+        """
+        Parse strings like '10/1' or '30000/1001' into float.
+        """
+        if rate is None:
+            return 0.0
+        if isinstance(rate, (int, float)):
+            return float(rate)
+
+        s = str(rate).strip()
+        if "/" in s:
+            a, b = s.split("/", 1)
+            try:
+                num = float(a)
+                den = float(b)
+                return num / den if den else 0.0
+            except Exception:
+                return 0.0
+        try:
+            return float(s)
+        except Exception:
+            return 0.0
+
+
+    def _infer_total_frames(self, item: dict, results: Optional[List[dict]] = None) -> int:
+        """
+        Prefer metadata_video_nb_frames. Fall back to framesCount in result.value,
+        then duration * fps.
+        """
+        data = item.get("data") or {}
+
+        # 1) Best source
+        n = int(safe_float(data.get("metadata_video_nb_frames"), 0))
+        if n > 0:
+            return n
+
+        # 2) From result.value.framesCount
+        if results:
+            for r in results:
+                value = r.get("value") or {}
+                n = int(safe_float(value.get("framesCount"), 0))
+                if n > 0:
+                    return n
+
+        # 3) duration * fps
+        duration = safe_float(
+            data.get("metadata_video_duration",
+                     data.get("duration", 0.0)),
+            0.0
+        )
+
+        fps = safe_float(data.get("frames_per_second"), 0.0)
+        if fps <= 0:
+            fps = self._parse_ffmpeg_rate(data.get("metadata_video_r_frame_rate"))
+        if fps <= 0:
+            fps = self._parse_ffmpeg_rate(data.get("metadata_video_avg_frame_rate"))
+
+        if duration > 0 and fps > 0:
+            return int(round(duration * fps))
+
+        return 0
+
+
+    def _eligible_frames_for_video(self, video_stem: str, total_frames: int) -> List[int]:
+        off = self._stride_offset(video_stem)
+        return [f for f in range(total_frames) if (f % self.frame_stride) == off]
+
+
+    def _write_empty_label(self, path: Path):
+        path.parent.mkdir(parents=True, exist_ok=True)
+        if not path.exists():
+            path.write_text("")
+
+    def _write_label(self, video_stem: str, frame_idx: int, text: str):
+        rel_path = f"{video_stem}/frame_{frame_idx:06d}.txt"
+
+        if self._sharder is not None:
+            self._sharder.write_text(rel_path, text)
+        else:
+            label_path = self.output_dir / rel_path
+            label_path.parent.mkdir(parents=True, exist_ok=True)
+            label_path.write_text(text)
+
+    def _sample_negative_frames_for_video(self, video_stem: str, total_frames: int, k: int) -> List[int]:
+        eligible = self._eligible_frames_for_video(video_stem, total_frames)
+        if not eligible or k <= 0:
+            return []
+
+        seed = zlib.crc32(f"{video_stem}|{self.negative_seed}".encode("utf-8"))
+        rng = random.Random(seed)
+
+        if k >= len(eligible):
+            return sorted(eligible)
+
+        return sorted(rng.sample(eligible, k))
+
+    @staticmethod
+    def _interpolate_sequence(seq: Iterable[dict]) -> Dict[int, List[Tuple[float, float, float, float]]]:
+        """
+        Given a Label Studio 'sequence' (list of keyframes) like:
+
+          {
+            "frame": 47, "x": 0, "y": 62.8, "width": 15.9, "height": 15.1, "enabled": true
+          },
+          ...
+
+        Produce: frame_index -> list of (x, y, w, h) in the SAME units as input.
+
+        Semantics:
+        - Every keyframe (enabled or not) produces a box at its own frame.
+        - If a keyframe has enabled=True, we linearly interpolate boxes for the frames
+          *between it and the next keyframe* (f0+1 .. f1-1).
+        - If a keyframe has enabled=False, we do NOT interpolate forward from it,
+          but we still keep its own box at that frame.
+        - A disabled keyframe can still be the *end* of an interpolation that started
+          from a previous enabled keyframe (since that interpolation uses the previous
+          keyframe's enabled flag).
+        """
+        # Sort keyframes by frame
+        kfs = sorted(seq, key=lambda k: int(safe_float(k.get("frame"), 0)))
+        frames_boxes: Dict[int, List[Tuple[float, float, float, float]]] = {}
+
+        if not kfs:
+            return frames_boxes
+
+        # 1) Add all keyframes as boxes at their exact frames
+        for k in kfs:
+            f = int(safe_float(k.get("frame"), -1))
+            if f < 0:
+                continue
+
+            x = safe_float(k.get("x"))
+            y = safe_float(k.get("y"))
+            w = safe_float(k.get("width"))
+            h = safe_float(k.get("height"))
+            frames_boxes.setdefault(f, []).append((x, y, w, h))
+
+        # 2) Interpolate between consecutive keyframes when the *start* keyframe is enabled
+        for i in range(len(kfs) - 1):
+            k0 = kfs[i]
+            k1 = kfs[i + 1]
+
+            f0 = int(safe_float(k0.get("frame"), -1))
+            f1 = int(safe_float(k1.get("frame"), -1))
+            if f0 < 0 or f1 <= f0:
+                continue
+
+            enabled0 = bool(k0.get("enabled", True))
+            if not enabled0:
+                # Do not interpolate forward from a disabled keyframe
+                continue
+
+            x0 = safe_float(k0.get("x"))
+            y0 = safe_float(k0.get("y"))
+            w0 = safe_float(k0.get("width"))
+            h0 = safe_float(k0.get("height"))
+
+            x1 = safe_float(k1.get("x"))
+            y1 = safe_float(k1.get("y"))
+            w1 = safe_float(k1.get("width"))
+            h1 = safe_float(k1.get("height"))
+
+            # Fill in strictly between endpoints; endpoints themselves are already added
+            for f in range(f0 + 1, f1):
+                t = (f - f0) / float(f1 - f0)
+                x = x0 + (x1 - x0) * t
+                y = y0 + (y1 - y0) * t
+                w = w0 + (w1 - w0) * t
+                h = h0 + (h1 - h0) * t
+                frames_boxes.setdefault(f, []).append((x, y, w, h))
+
+        return frames_boxes
+
+    def _log_error(self, context: str, exc: Exception):
+        try:
+            self.error_log_path.parent.mkdir(parents=True, exist_ok=True)
+            with self.error_log_path.open("a") as f:
+                f.write(f"\n=== ERROR in {context} ===\n")
+                traceback.print_exception(type(exc), exc, exc.__traceback__, file=f)
+        except Exception:
+            # last-resort: don't crash because logging failed
+            pass
+
+    @staticmethod
+    def _parse_ts(s):
+        return datetime.fromisoformat(s.replace("Z", "+00:00"))
+
+    def _convert_item(self, item: dict) -> ConvertStats:
+        stats = ConvertStats()
+
+        data = item.get("data") or {}
+        site = data.get("metadata_file_site_reference_string") or ""
+        if len(self.include_sites) > 0:
+            if site not in self.include_sites:
+                # Not in included sites
+                return stats
+
+        video_uri = data.get("metadata_file_filename") or data.get("video") or "unknown.mp4"
+        video_stem = Path(video_uri).stem
+        vid_w = int(safe_float(data.get("metadata_video_width"), 0))
+        vid_h = int(safe_float(data.get("metadata_video_height"), 0))
+
+        annos = item.get("annotations") or []
+        results = []
+        if len(annos) > 0:
+            latest_ann = max(
+                annos,
+                key=lambda a: YoloConverterLSVideo._parse_ts(a["updated_at"])
+            )
+
+            for r in (latest_ann.get("result") or []):
+                if r.get("type") != self.result_type:
+                    continue
+                if self.from_name is not None and r.get("from_name") != self.from_name:
+                    continue
+                if self.to_name is not None and r.get("to_name") != self.to_name:
+                    continue
+                results.append(r)
+
+        wrote_any = False
+
+        # Collect lines per frame
+        frame_lines: Dict[int, List[str]] = defaultdict(list)
+        for r in results:
+            value = r.get("value") or {}
+            labels: List[str] = value.get("labels") or []
+            if not labels:
+                continue
+            cls_name = labels[0]
+            if cls_name not in self.class_map:
+                # unknown class; skip this track
+                continue
+            cls_id = self.class_map[cls_name]
+
+            seq: Iterable[dict] = value.get("sequence") or []
+            frame_boxes = self._interpolate_sequence(seq)  # frame -> [(x,y,w,h), ...]
+
+            for frame_idx, boxes in frame_boxes.items():
+                for (x, y, w, h) in boxes:
+                    xc, yc, wn, hn = to_yolo(
+                        x, y, w, h,
+                        vid_w=vid_w,
+                        vid_h=vid_h,
+                        forced_mode=self.coord_mode,
+                    )
+                    frame_lines[frame_idx].append(f"{cls_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}")
+                    wrote_any = True
+
+        if wrote_any:
+            stats.videos_with_boxes += 1
+        else:
+            stats.videos_without_boxes += 1
+            if self.empty_list_path:
+                self.empty_list_path.parent.mkdir(parents=True, exist_ok=True)
+                with self.empty_list_path.open("a") as f:
+                    f.write(f"{video_uri}\n")
+
+            if self.include_negatives:
+                total_frames = self._infer_total_frames(item, results=None)
+                if total_frames > 0:
+                    self._negative_candidates.append(
+                        NegativeVideoCandidate(
+                            video_stem=video_stem,
+                            video_uri=video_uri,
+                            total_frames=total_frames,
+                        )
+                    )
+
+        # Apply frame sampling
+        if self.frame_stride > 1 and frame_lines:
+            off = self._stride_offset(video_stem)
+            frame_lines = {f: lines for f, lines in frame_lines.items()
+                           if (f % self.frame_stride) == off}
+
+        stats.label_files_written += len(frame_lines)
+        self._positive_frame_files_written += len(frame_lines)
+        if self._sharder is None:
+            # write to filesystem
+            vid_dir = self.output_dir / video_stem
+
+            if vid_dir.exists() and not self.overwrite_video_dir:
+                # skip existing video dir to avoid mixing runs
+                return stats
+            vid_dir.mkdir(parents=True, exist_ok=True)
+
+        for frame_idx, lines in frame_lines.items():
+            self._write_label(video_stem, frame_idx, "\n".join(lines) + "\n")
+
+        return stats
+
diff --git a/training/object-detection/src/object_detection/yolo_ls/parsing.py b/training/object-detection/src/object_detection/yolo_ls/parsing.py
new file mode 100644
index 0000000..6be0923
--- /dev/null
+++ b/training/object-detection/src/object_detection/yolo_ls/parsing.py
@@ -0,0 +1,104 @@
+from pathlib import Path
+from typing import Dict, Optional, Tuple, Any
+import yaml
+
+def coord_mode(x: float, y: float, w: float, h: float) -> str:
+    """
+    Infer coordinate mode for Label Studio:
+    - 'percent'   : typical LS UI export (0..100)
+    - 'normalized': already 0..1
+    - 'pixel'     : values > 100 (needs video width/height)
+    """
+    mx = max(x, y, w, h)
+    if mx <= 1.0000001:       # already normalized
+        return "normalized"
+    if mx <= 100.0000001:     # percent
+        return "percent"
+    return "pixel"
+
+
+def to_yolo(
+    x: float,
+    y: float,
+    w: float,
+    h: float,
+    vid_w: int,
+    vid_h: int,
+    forced_mode: Optional[str] = None,
+) -> Tuple[float, float, float, float]:
+    """
+    Convert LS-style box to YOLO (xc, yc, w, h) in [0,1].
+
+    :param forced_mode: One of {"percent", "normalized", "pixel", None/"auto"}.
+                        If None or "auto", infer from values.
+    """
+    mode = forced_mode or "auto"
+    if mode == "auto":
+        mode = coord_mode(x, y, w, h)
+
+    if mode == "normalized":
+        xc = x + w / 2.0
+        yc = y + h / 2.0
+        wn = w
+        hn = h
+    elif mode == "percent":
+        xc = (x + w / 2.0) / 100.0
+        yc = (y + h / 2.0) / 100.0
+        wn = w / 100.0
+        hn = h / 100.0
+    elif mode == "pixel":
+        xc = (x + w / 2.0) / float(vid_w) if vid_w else 0.0
+        yc = (y + h / 2.0) / float(vid_h) if vid_h else 0.0
+        wn = w / float(vid_w) if vid_w else 0.0
+        hn = h / float(vid_h) if vid_h else 0.0
+    else:
+        raise ValueError(f"Unknown coord_mode: {forced_mode!r}")
+
+    # clamp to [0,1]
+    xc = min(max(xc, 0.0), 1.0)
+    yc = min(max(yc, 0.0), 1.0)
+    wn = min(max(wn, 0.0), 1.0)
+    hn = min(max(hn, 0.0), 1.0)
+    return xc, yc, wn, hn
+
+def load_class_map_from_yolo_yaml(yaml_path: Path) -> Dict[str, int]:
+    """
+    Load a YOLO-style data.yaml and return a mapping: class_name -> class_id
+
+    Expects something like:
+
+      names:
+        0: Coho
+        1: Bull
+        2: Rainbow
+        ...
+
+    or:
+
+      names: [Coho, Bull, Rainbow, ...]
+    """
+    data: Any = yaml.safe_load(Path(yaml_path).read_text())
+    names = data.get("names")
+    if names is None:
+        raise ValueError(f"'names' not found in {yaml_path}")
+
+    class_map: Dict[str, int] = {}
+
+    if isinstance(names, dict):
+        # {0: 'Coho', 1: 'Bull', ...} (keys can be int or str)
+        for k, v in names.items():
+            try:
+                idx = int(k)
+            except Exception:
+                raise ValueError(f"Invalid class index {k!r} in names of {yaml_path}")
+            label = str(v)
+            class_map[label] = idx
+    elif isinstance(names, (list, tuple)):
+        # ['Coho', 'Bull', 'Rainbow', ...]
+        for idx, label in enumerate(names):
+            class_map[str(label)] = idx
+    else:
+        raise ValueError(f"Unsupported 'names' structure in {yaml_path}: {type(names)}")
+
+    return class_map
+
diff --git a/training/object-detection/src/object_detection/yolo_ls/shards.py b/training/object-detection/src/object_detection/yolo_ls/shards.py
new file mode 100644
index 0000000..88286aa
--- /dev/null
+++ b/training/object-detection/src/object_detection/yolo_ls/shards.py
@@ -0,0 +1,43 @@
+import tarfile
+from pathlib import Path
+import io
+
+class TarShardWriter:
+    def __init__(self, out_dir: Path, shard_size: int = 10000, prefix: str = "yolo_annos"):
+        self.out_dir = Path(out_dir)
+        self.out_dir.mkdir(parents=True, exist_ok=True)
+        self.shard_size = int(shard_size)
+        self.prefix = prefix
+
+        self._shard_idx = 0
+        self._n_in_shard = 0
+        self._tar = None  # tarfile.TarFile
+
+        self._open_new()
+
+    def _open_new(self):
+        if self._tar is not None:
+            self._tar.close()
+        shard_name = f"{self.prefix}-{self._shard_idx:06d}.tar"
+        self._tar_path = self.out_dir / shard_name
+        self._tar = tarfile.open(self._tar_path, mode="w")  # uncompressed tar
+        self._n_in_shard = 0
+        self._shard_idx += 1
+
+    def write_text(self, rel_path: str, text: str):
+        self.write_bytes(rel_path, text.encode("utf-8"))
+
+    def write_bytes(self, rel_path: str, data: bytes):
+        if self._n_in_shard >= self.shard_size:
+            self._open_new()
+
+        ti = tarfile.TarInfo(name=rel_path)
+        ti.size = len(data)
+        self._tar.addfile(ti, io.BytesIO(data))
+        self._n_in_shard += 1
+
+    def close(self):
+        if self._tar is not None:
+            self._tar.close()
+            self._tar = None
+
diff --git a/training/object-detection/tests/conftest.py b/training/object-detection/tests/conftest.py
new file mode 100644
index 0000000..65b47dc
--- /dev/null
+++ b/training/object-detection/tests/conftest.py
@@ -0,0 +1,90 @@
+import json
+from pathlib import Path
+
+import pytest
+
+
+def write_text(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def make_ls_item(
+    *,
+    item_id=1,
+    site="tankeeah",
+    filename="HIRMD-tankeeah-jetson-0_20240704_055747_M.mp4",
+    updated_at="2025-03-01T03:49:36.058815Z",
+    labels=("Sockeye",),
+    sequence=None,
+    result_type="videorectangle",
+    from_name="box",
+    to_name="video",
+    include_result=True,
+    metadata_video_nb_frames=30,
+):
+    if sequence is None:
+        sequence = [
+            {"enabled": True, "frame": 10, "x": 10, "y": 20, "width": 30, "height": 40},
+            {"enabled": True, "frame": 12, "x": 20, "y": 30, "width": 30, "height": 40},
+        ]
+
+    result = []
+    if include_result:
+        result = [
+            {
+                "type": result_type,
+                "from_name": from_name,
+                "to_name": to_name,
+                "value": {
+                    "labels": list(labels),
+                    "sequence": sequence,
+                    "framesCount": metadata_video_nb_frames,
+                },
+            }
+        ]
+
+    return {
+        "id": item_id,
+        "data": {
+            "metadata_file_site_reference_string": site,
+            "metadata_file_filename": filename,
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": metadata_video_nb_frames,
+            "metadata_video_duration": metadata_video_nb_frames / 10.0,
+            "frames_per_second": 10.0,
+            "metadata_video_r_frame_rate": "10/1",
+            "metadata_video_avg_frame_rate": "10/1",
+            "video": f"s3://bucket/{filename}",
+        },
+        "annotations": [
+            {
+                "updated_at": updated_at,
+                "result": result,
+            }
+        ],
+    }
+
+
+def make_label_file(root: Path, video_stem: str, frame_idx: int, lines: str) -> Path:
+    path = root / video_stem / f"frame_{frame_idx:06d}.txt"
+    write_text(path, lines)
+    return path
+
+
+@pytest.fixture
+def make_label_file_fixture():
+    return make_label_file
+
+
+@pytest.fixture
+def sample_item():
+    return make_ls_item()
+
+
+@pytest.fixture
+def sample_json_file(tmp_path, sample_item):
+    p = tmp_path / "input.json"
+    p.write_text(json.dumps([sample_item]), encoding="utf-8")
+    return p
diff --git a/training/object-detection/tests/test_negatives_cli.py b/training/object-detection/tests/test_negatives_cli.py
new file mode 100644
index 0000000..8dcf0ab
--- /dev/null
+++ b/training/object-detection/tests/test_negatives_cli.py
@@ -0,0 +1,147 @@
+import json
+import sys
+from pathlib import Path
+
+from object_detection.negatives.cli import main
+
+
+def write_conditions_csv(path: Path):
+    import csv
+
+    fieldnames = [
+        "Project",
+        "Site",
+        "Camera",
+        "Filename",
+        "Date",
+        "Time",
+        "Turbidity (1-5)",
+        "Debris (buildup within box 1-5)",
+        "Algae (buildup on plexiglass (1-5))",
+        "Lighting (1-5)",
+        "Tidal (0/1)",
+        "Camera orientation (Normal, vertical, horizontal) (0/1)",
+        "Box above water",
+        "Notes:",
+        "Image Link:",
+    ]
+    rows = [
+        {
+            "Project": "HIRMD",
+            "Site": "tankeeah",
+            "Camera": "jetson-0",
+            "Filename": "1233905",
+            "Date": "2025-06-04",
+            "Time": "0:42:42",
+            "Turbidity (1-5)": "1",
+            "Debris (buildup within box 1-5)": "1",
+            "Algae (buildup on plexiglass (1-5))": "1",
+            "Lighting (1-5)": "2",
+            "Tidal (0/1)": "0",
+            "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+            "Box above water": "0",
+            "Notes:": "",
+            "Image Link:": "",
+        }
+    ]
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for row in rows:
+            w.writerow(row)
+
+
+class FakeBody:
+    def __init__(self, payload: str):
+        self.payload = payload.encode("utf-8")
+
+    def read(self):
+        return self.payload
+
+
+class FakeS3Client:
+    def __init__(self, objects):
+        self.objects = objects
+
+    def get_object(self, Bucket: str, Key: str):
+        return {"Body": FakeBody(json.dumps(self.objects[Key]))}
+
+
+class FakeSession:
+    def __init__(self, objects):
+        self.objects = objects
+
+    def client(self, name: str):
+        assert name == "s3"
+        return FakeS3Client(self.objects)
+
+
+def test_negatives_cli_smoke(tmp_path, monkeypatch):
+    import object_detection.negatives.conditions as cond_mod
+
+    csv_path = tmp_path / "conditions.csv"
+    write_conditions_csv(csv_path)
+
+    video_stem = "HIRMD-tankeeah-jetson-0_20250604_004242_M"
+    task_item = {
+        "id": 1,
+        "data": {
+            "metadata_file_site_reference_string": "tankeeah",
+            "metadata_file_filename": f"{video_stem}.mp4",
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": 30,
+            "metadata_video_duration": 3.0,
+            "frames_per_second": 10.0,
+            "metadata_video_r_frame_rate": "10/1",
+            "metadata_video_avg_frame_rate": "10/1",
+            "video": f"s3://bucket/{video_stem}.mp4",
+        },
+        "annotations": [
+            {
+                "updated_at": "2025-03-01T03:49:36.058815Z",
+                "result": [
+                    {
+                        "type": "videorectangle",
+                        "from_name": "box",
+                        "to_name": "video",
+                        "value": {
+                            "labels": ["Sockeye"],
+                            "sequence": [
+                                {"enabled": True, "frame": 10, "x": 10, "y": 20, "width": 30, "height": 40}
+                            ],
+                            "framesCount": 30,
+                        },
+                    }
+                ],
+            }
+        ],
+    }
+
+    objects = {
+        f"HIRMD/tankeeah/jetson-0/labelstudio_tasks/{video_stem}.json": [task_item]
+    }
+
+    monkeypatch.setattr(cond_mod.boto3, "Session", lambda profile_name=None: FakeSession(objects))
+    out_dir = tmp_path / "out"
+
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "prog",
+            "--conditions-csv", str(csv_path),
+            "--out-dir", str(out_dir),
+            "--frames-per-video", "3",
+            "--frame-stride", "3",
+            "--frame-offset-mode", "fixed",
+            "--frame-offset", "0",
+        ],
+    )
+
+    main()
+
+    assert (out_dir / "condition_negative_manifest.csv").exists()
+    assert (out_dir / "condition_negative_summary.json").exists()
+    assert (out_dir / "condition_negatives-000000.tar").exists()
diff --git a/training/object-detection/tests/test_negatives_conditions.py b/training/object-detection/tests/test_negatives_conditions.py
new file mode 100644
index 0000000..bb58047
--- /dev/null
+++ b/training/object-detection/tests/test_negatives_conditions.py
@@ -0,0 +1,687 @@
+import json
+import tarfile
+from pathlib import Path
+from typing import Dict, List
+
+import pytest
+
+from object_detection.negatives.conditions import (
+    DEFAULT_BUCKET,
+    ConditionRow,
+    active_condition_columns,
+    compute_condition_targets,
+    construct_task_s3_key,
+    construct_video_stem,
+    create_condition_negative_shards,
+    eligible_negative_frames,
+    extract_latest_results,
+    extract_positive_frames,
+    extract_task_item,
+    greedy_select_balanced_rows,
+    infer_condition_columns,
+    infer_total_frames,
+    interpolate_sequence,
+    load_condition_rows,
+    normalize_date,
+    normalize_time,
+    normalize_value,
+    parse_ffmpeg_rate,
+    parse_ts,
+    sample_frames,
+    stride_offset,
+)
+
+
+def write_text(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def make_conditions_csv(path: Path, rows: List[dict]) -> Path:
+    import csv
+
+    fieldnames = [
+        "Project",
+        "Site",
+        "Camera",
+        "Filename",
+        "Date",
+        "Time",
+        "Turbidity (1-5)",
+        "Debris (buildup within box 1-5)",
+        "Algae (buildup on plexiglass (1-5))",
+        "Lighting (1-5)",
+        "Tidal (0/1)",
+        "Camera orientation (Normal, vertical, horizontal) (0/1)",
+        "Box above water",
+        "Notes:",
+        "Image Link:",
+    ]
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=fieldnames)
+        w.writeheader()
+        for row in rows:
+            w.writerow(row)
+    return path
+
+
+def make_task_item(
+    *,
+    filename: str,
+    site: str = "tankeeah",
+    total_frames: int = 30,
+    sequence=None,
+    updated_at: str = "2025-03-01T03:49:36.058815Z",
+):
+    if sequence is None:
+        sequence = [
+            {"enabled": True, "frame": 10, "x": 10, "y": 20, "width": 30, "height": 40},
+            {"enabled": True, "frame": 12, "x": 20, "y": 30, "width": 30, "height": 40},
+        ]
+
+    result = []
+    if sequence is not None:
+        result = [
+            {
+                "type": "videorectangle",
+                "from_name": "box",
+                "to_name": "video",
+                "value": {
+                    "labels": ["Sockeye"],
+                    "sequence": sequence,
+                    "framesCount": total_frames,
+                },
+            }
+        ]
+
+    return {
+        "id": 1,
+        "data": {
+            "metadata_file_site_reference_string": site,
+            "metadata_file_filename": filename,
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": total_frames,
+            "metadata_video_duration": total_frames / 10.0,
+            "frames_per_second": 10.0,
+            "metadata_video_r_frame_rate": "10/1",
+            "metadata_video_avg_frame_rate": "10/1",
+            "video": f"s3://bucket/{filename}",
+        },
+        "annotations": [
+            {
+                "updated_at": updated_at,
+                "result": result,
+            }
+        ],
+    }
+
+
+class FakeBody:
+    def __init__(self, payload: str):
+        self.payload = payload.encode("utf-8")
+
+    def read(self):
+        return self.payload
+
+
+class FakeS3Client:
+    def __init__(self, objects: Dict[str, object]):
+        self.objects = objects
+
+    def get_object(self, Bucket: str, Key: str):
+        if Key not in self.objects:
+            raise KeyError(Key)
+        return {"Body": FakeBody(json.dumps(self.objects[Key]))}
+
+
+class FakeSession:
+    def __init__(self, objects: Dict[str, object]):
+        self.objects = objects
+
+    def client(self, name: str):
+        assert name == "s3"
+        return FakeS3Client(self.objects)
+
+
+# ----------------------------
+# Small helpers
+# ----------------------------
+
+def test_normalize_value():
+    assert normalize_value(" 1 ") == "1"
+    assert normalize_value("NA") is None
+    assert normalize_value("") is None
+    assert normalize_value(None) is None
+
+
+def test_normalize_date():
+    assert normalize_date("2025-06-04") == "20250604"
+
+
+def test_normalize_time():
+    assert normalize_time("3:17:05") == "031705"
+    assert normalize_time("13:17:05") == "131705"
+
+
+def test_construct_video_stem():
+    got = construct_video_stem("HIRMD", "tankeeah", "jetson-1", "2025-06-04", "3:17:05")
+    assert got == "HIRMD-tankeeah-jetson-1_20250604_031705_M"
+
+
+def test_construct_task_s3_key():
+    got = construct_task_s3_key(
+        "HIRMD",
+        "tankeeah",
+        "jetson-1",
+        "HIRMD-tankeeah-jetson-1_20250604_031705_M",
+    )
+    assert got == "HIRMD/tankeeah/jetson-1/labelstudio_tasks/HIRMD-tankeeah-jetson-1_20250604_031705_M.json"
+
+
+def test_infer_condition_columns():
+    fieldnames = [
+        "Project",
+        "Site",
+        "Camera",
+        "Filename",
+        "Date",
+        "Time",
+        "Turbidity (1-5)",
+        "Lighting (1-5)",
+        "Notes:",
+    ]
+    got = infer_condition_columns(fieldnames)
+    assert got == ["Turbidity (1-5)", "Lighting (1-5)"]
+
+
+# ----------------------------
+# CSV loading and balancing
+# ----------------------------
+
+def test_load_condition_rows(tmp_path: Path):
+    csv_path = make_conditions_csv(
+        tmp_path / "cond.csv",
+        [
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-1",
+                "Filename": "1233080",
+                "Date": "2025-06-04",
+                "Time": "3:17:05",
+                "Turbidity (1-5)": "1",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "2",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            }
+        ],
+    )
+
+    rows, cols = load_condition_rows([csv_path])
+
+    assert len(rows) == 1
+    assert rows[0].video_stem == "HIRMD-tankeeah-jetson-1_20250604_031705_M"
+    assert rows[0].s3_key.endswith(".json")
+    assert "Turbidity (1-5)" in cols
+    assert rows[0].conditions["Lighting (1-5)"] == "2"
+
+
+def test_load_condition_rows_skips_blank_or_na_rows(tmp_path: Path):
+    csv_path = make_conditions_csv(
+        tmp_path / "cond.csv",
+        [
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-1",
+                "Filename": "NA",
+                "Date": "2025-06-04",
+                "Time": "3:17:05",
+                "Turbidity (1-5)": "1",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "2",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            }
+        ],
+    )
+
+    rows, _ = load_condition_rows([csv_path])
+    assert rows == []
+
+
+def test_active_condition_columns():
+    rows = [
+        ConditionRow(
+            project="HIRMD",
+            site="tankeeah",
+            camera="jetson-0",
+            labelstudio_task_id="1",
+            date="2025-06-04",
+            time="3:17:05",
+            video_stem="a",
+            s3_key="a.json",
+            conditions={"Turbidity (1-5)": "1", "Lighting (1-5)": "2"},
+            source_csv="x.csv",
+        ),
+        ConditionRow(
+            project="HIRMD",
+            site="tankeeah",
+            camera="jetson-0",
+            labelstudio_task_id="2",
+            date="2025-06-05",
+            time="3:17:05",
+            video_stem="b",
+            s3_key="b.json",
+            conditions={"Turbidity (1-5)": "2", "Lighting (1-5)": "2"},
+            source_csv="x.csv",
+        ),
+    ]
+    cols = active_condition_columns(rows, ["Turbidity (1-5)", "Lighting (1-5)"])
+    assert cols == ["Turbidity (1-5)"]
+
+
+def test_compute_condition_targets():
+    rows = [
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "1", "2025-06-04", "3:17:05", "a", "a.json",
+                     {"Turbidity (1-5)": "1", "Lighting (1-5)": "2"}, "x.csv"),
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "2", "2025-06-05", "3:17:05", "b", "b.json",
+                     {"Turbidity (1-5)": "1", "Lighting (1-5)": "1"}, "x.csv"),
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "3", "2025-06-06", "3:17:05", "c", "c.json",
+                     {"Turbidity (1-5)": "2", "Lighting (1-5)": "2"}, "x.csv"),
+    ]
+    targets, counts = compute_condition_targets(rows, ["Turbidity (1-5)", "Lighting (1-5)"])
+
+    assert counts["Turbidity (1-5)"]["1"] == 2
+    assert counts["Turbidity (1-5)"]["2"] == 1
+    assert targets[("Turbidity (1-5)", "1")] == 1
+    assert targets[("Turbidity (1-5)", "2")] == 1
+
+
+def test_greedy_select_balanced_rows():
+    rows = [
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "1", "2025-06-04", "3:17:05", "a", "a.json",
+                     {"Turbidity (1-5)": "1", "Lighting (1-5)": "1"}, "x.csv"),
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "2", "2025-06-05", "3:17:05", "b", "b.json",
+                     {"Turbidity (1-5)": "1", "Lighting (1-5)": "2"}, "x.csv"),
+        ConditionRow("HIRMD", "tankeeah", "jetson-0", "3", "2025-06-06", "3:17:05", "c", "c.json",
+                     {"Turbidity (1-5)": "2", "Lighting (1-5)": "2"}, "x.csv"),
+    ]
+
+    selected, targets, counts = greedy_select_balanced_rows(rows, ["Turbidity (1-5)", "Lighting (1-5)"])
+    assert len(selected) >= 2
+    assert ("Turbidity (1-5)", "1") in targets
+    assert counts["Lighting (1-5)"]["2"] == 2
+
+
+# ----------------------------
+# Task JSON helpers
+# ----------------------------
+
+def test_parse_ts():
+    dt = parse_ts("2025-03-01T03:49:36.058815Z")
+    assert dt.year == 2025
+    assert dt.month == 3
+    assert dt.tzinfo is not None
+
+
+def test_parse_ffmpeg_rate():
+    assert parse_ffmpeg_rate("30000/1001") == pytest.approx(29.97002997)
+    assert parse_ffmpeg_rate("10") == pytest.approx(10.0)
+    assert parse_ffmpeg_rate("bad") == 0.0
+
+
+def test_interpolate_sequence():
+    seq = [
+        {"enabled": True, "frame": 10, "x": 0, "y": 0, "width": 10, "height": 10},
+        {"enabled": True, "frame": 12, "x": 20, "y": 20, "width": 10, "height": 10},
+    ]
+    out = interpolate_sequence(seq)
+    assert sorted(out.keys()) == [10, 11, 12]
+    assert out[11][0][0] == pytest.approx(10.0)
+
+
+def test_extract_task_item_from_dict():
+    item = make_task_item(filename="HIRMD-tankeeah-jetson-0_20250604_031705_M.mp4")
+    got = extract_task_item(item, "HIRMD-tankeeah-jetson-0_20250604_031705_M")
+    assert got["data"]["metadata_file_filename"].endswith(".mp4")
+
+
+def test_extract_task_item_from_list_prefers_matching_stem():
+    item1 = make_task_item(filename="A-tankeeah-jetson-0_20250604_031705_M.mp4")
+    item2 = make_task_item(filename="HIRMD-tankeeah-jetson-0_20250604_031705_M.mp4")
+    got = extract_task_item([item1, item2], "HIRMD-tankeeah-jetson-0_20250604_031705_M")
+    assert got["data"]["metadata_file_filename"] == "HIRMD-tankeeah-jetson-0_20250604_031705_M.mp4"
+
+
+def test_infer_total_frames_prefers_metadata_video_nb_frames():
+    item = make_task_item(filename="x.mp4", total_frames=97)
+    assert infer_total_frames(item) == 97
+
+
+def test_infer_total_frames_falls_back_to_result_framescount():
+    item = make_task_item(filename="x.mp4", total_frames=30)
+    item["data"]["metadata_video_nb_frames"] = 0
+    results = item["annotations"][0]["result"]
+    assert infer_total_frames(item, results=results) == 30
+
+
+def test_infer_total_frames_falls_back_to_duration_times_fps():
+    item = make_task_item(filename="x.mp4", total_frames=30)
+    item["data"]["metadata_video_nb_frames"] = 0
+    item["annotations"][0]["result"][0]["value"].pop("framesCount", None)
+    item["data"]["metadata_video_duration"] = 9.7
+    item["data"]["frames_per_second"] = 10.0
+    assert infer_total_frames(item, results=item["annotations"][0]["result"]) == 97
+
+
+def test_extract_latest_results_filters_type_names():
+    item = make_task_item(filename="x.mp4")
+    item["annotations"].append(
+        {
+            "updated_at": "2025-03-02T03:49:36.058815Z",
+            "result": [
+                {
+                    "type": "videorectangle",
+                    "from_name": "box",
+                    "to_name": "video",
+                    "value": {"labels": ["Sockeye"], "sequence": []},
+                },
+                {
+                    "type": "not_video",
+                    "from_name": "box",
+                    "to_name": "video",
+                    "value": {},
+                },
+            ],
+        }
+    )
+
+    got = extract_latest_results(item, result_type="videorectangle", from_name="box", to_name="video")
+    assert len(got) == 1
+    assert got[0]["type"] == "videorectangle"
+
+
+def test_extract_positive_frames():
+    item = make_task_item(filename="x.mp4")
+    got = extract_positive_frames(item)
+    assert got == {10, 11, 12}
+
+
+def test_stride_offset_fixed():
+    assert stride_offset("video", 3, "fixed", 1) == 1
+
+
+def test_stride_offset_video_hash_is_deterministic():
+    a = stride_offset("video_name", 3, "video_hash", 0)
+    b = stride_offset("video_name", 3, "video_hash", 0)
+    assert a == b
+    assert 0 <= a < 3
+
+
+def test_eligible_negative_frames():
+    got = eligible_negative_frames(
+        "video",
+        total_frames=10,
+        positive_frames={0, 3, 6},
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+    )
+    assert got == [9]
+
+
+def test_sample_frames_is_deterministic():
+    eligible = list(range(20))
+    a = sample_frames("video", eligible, 5, seed=42)
+    b = sample_frames("video", eligible, 5, seed=42)
+    assert a == b
+    assert len(a) == 5
+
+
+# ----------------------------
+# End-to-end create_condition_negative_shards
+# ----------------------------
+
+def test_create_condition_negative_shards(tmp_path: Path, monkeypatch):
+    csv_path = make_conditions_csv(
+        tmp_path / "conditions.csv",
+        [
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-0",
+                "Filename": "1233905",
+                "Date": "2025-06-04",
+                "Time": "0:42:42",
+                "Turbidity (1-5)": "1",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "2",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-1",
+                "Filename": "1233088",
+                "Date": "2025-06-04",
+                "Time": "8:22:39",
+                "Turbidity (1-5)": "2",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "1",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+        ],
+    )
+
+    video_stem_a = "HIRMD-tankeeah-jetson-0_20250604_004242_M"
+    video_stem_b = "HIRMD-tankeeah-jetson-1_20250604_082239_M"
+
+    objects = {
+        f"HIRMD/tankeeah/jetson-0/labelstudio_tasks/{video_stem_a}.json": [
+            make_task_item(filename=f"{video_stem_a}.mp4", total_frames=30, sequence=[{"enabled": True, "frame": 10, "x": 10, "y": 20, "width": 30, "height": 40}])
+        ],
+        f"HIRMD/tankeeah/jetson-1/labelstudio_tasks/{video_stem_b}.json": [
+            make_task_item(filename=f"{video_stem_b}.mp4", total_frames=30, sequence=[{"enabled": True, "frame": 12, "x": 10, "y": 20, "width": 30, "height": 40}])
+        ],
+    }
+
+    import object_detection.negatives.conditions as cond_mod
+
+    monkeypatch.setattr(cond_mod.boto3, "Session", lambda profile_name=None: FakeSession(objects))
+
+    out_dir = tmp_path / "out"
+    summary = create_condition_negative_shards(
+        csv_paths=[csv_path],
+        out_dir=out_dir,
+        frames_per_video=5,
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+        shard_size=1000,
+        negative_seed=42,
+    )
+
+    assert summary["input_rows"] == 2
+    assert summary["written_videos"] >= 1
+    assert summary["written_negative_frames"] >= 1
+
+    manifest = out_dir / "condition_negative_manifest.csv"
+    report = out_dir / "condition_negative_summary.json"
+    shard = out_dir / "condition_negatives-000000.tar"
+
+    assert manifest.exists()
+    assert report.exists()
+    assert shard.exists()
+
+    with tarfile.open(shard, "r") as tf:
+        names = tf.getnames()
+        assert len(names) >= 1
+        for name in names:
+            content = tf.extractfile(name).read().decode("utf-8")
+            assert content == ""
+
+
+def test_create_condition_negative_shards_handles_missing_s3_object(tmp_path: Path, monkeypatch):
+    csv_path = make_conditions_csv(
+        tmp_path / "conditions.csv",
+        [
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-0",
+                "Filename": "1233905",
+                "Date": "2025-06-04",
+                "Time": "0:42:42",
+                "Turbidity (1-5)": "1",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "2",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-1",
+                "Filename": "1233088",
+                "Date": "2025-06-04",
+                "Time": "8:22:39",
+                "Turbidity (1-5)": "2",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "1",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+        ],
+    )
+
+    import object_detection.negatives.conditions as cond_mod
+    monkeypatch.setattr(cond_mod.boto3, "Session", lambda profile_name=None: FakeSession({}))
+
+    out_dir = tmp_path / "out"
+    summary = create_condition_negative_shards(
+        csv_paths=[csv_path],
+        out_dir=out_dir,
+        frames_per_video=5,
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+    )
+
+    assert summary["written_videos"] == 0
+    assert len(summary["failures"]) == 2
+    assert (out_dir / "condition_negative_summary.json").exists()
+
+
+def test_create_condition_negative_shards_respects_from_to_name(tmp_path: Path, monkeypatch):
+    csv_path = make_conditions_csv(
+        tmp_path / "conditions.csv",
+        [
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-0",
+                "Filename": "1233905",
+                "Date": "2025-06-04",
+                "Time": "0:42:42",
+                "Turbidity (1-5)": "1",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "2",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+            {
+                "Project": "HIRMD",
+                "Site": "tankeeah",
+                "Camera": "jetson-1",
+                "Filename": "1233088",
+                "Date": "2025-06-04",
+                "Time": "8:22:39",
+                "Turbidity (1-5)": "2",
+                "Debris (buildup within box 1-5)": "1",
+                "Algae (buildup on plexiglass (1-5))": "1",
+                "Lighting (1-5)": "1",
+                "Tidal (0/1)": "0",
+                "Camera orientation (Normal, vertical, horizontal) (0/1)": "Normal",
+                "Box above water": "0",
+                "Notes:": "",
+                "Image Link:": "",
+            },
+        ],
+    )
+
+    video_stem = "HIRMD-tankeeah-jetson-0_20250604_004242_M"
+    item = make_task_item(filename=f"{video_stem}.mp4", total_frames=30)
+    item["annotations"][0]["result"] = [
+        {
+            "type": "videorectangle",
+            "from_name": "other",   # intentionally filtered out
+            "to_name": "video",
+            "value": {
+                "labels": ["Sockeye"],
+                "sequence": [
+                    {"enabled": True, "frame": 9, "x": 10, "y": 20, "width": 30, "height": 40}
+                ],
+            },
+        }
+    ]
+
+    video_stem_2 = "HIRMD-tankeeah-jetson-1_20250604_082239_M"
+    item2 = make_task_item(filename=f"{video_stem_2}.mp4", total_frames=30, sequence=[])
+
+    objects = {
+        f"HIRMD/tankeeah/jetson-0/labelstudio_tasks/{video_stem}.json": [item],
+        f"HIRMD/tankeeah/jetson-1/labelstudio_tasks/{video_stem_2}.json": [item2],
+    }
+
+    import object_detection.negatives.conditions as cond_mod
+    monkeypatch.setattr(cond_mod.boto3, "Session", lambda profile_name=None: FakeSession(objects))
+
+    out_dir = tmp_path / "out"
+    summary = create_condition_negative_shards(
+        csv_paths=[csv_path],
+        out_dir=out_dir,
+        frames_per_video=3,
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+        from_name="box",
+        to_name="video",
+    )
+
+    # Since the only annotation is filtered out, all stride-compatible frames are eligible negatives
+    assert summary["written_videos"] == 2
+    assert summary["written_negative_frames"] == 6
diff --git a/training/object-detection/tests/test_splits_cli.py b/training/object-detection/tests/test_splits_cli.py
new file mode 100644
index 0000000..225ce38
--- /dev/null
+++ b/training/object-detection/tests/test_splits_cli.py
@@ -0,0 +1,44 @@
+import json
+import sys
+
+from object_detection.splits.cli import main
+
+
+def test_splits_cli_smoke(tmp_path, monkeypatch):
+    labels_root = tmp_path / "labels"
+    out_dir = tmp_path / "splits"
+
+    for video, frame, line in [
+        ("HIRMD-tankeeah-jetson-0_20250714_012827_M", 10, "0 0.5 0.5 0.1 0.2\n"),
+        ("HIRMD-tankeeah-jetson-0_20250715_012827_M", 11, "1 0.5 0.5 0.2 0.2\n"),
+        ("SFC-bear-jetsonnx-0_20250912_011859_M", 20, "0 0.5 0.5 0.3 0.1\n"),
+    ]:
+        p = labels_root / video / f"frame_{frame:06d}.txt"
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(line, encoding="utf-8")
+
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "prog",
+            "--labels-root", str(labels_root),
+            "--out-dir", str(out_dir),
+            "--sites", "tankeeah", "bear",
+            "--seed", "42",
+            "--train-frac", "0.8",
+            "--val-frac", "0.1",
+            "--test-frac", "0.1",
+        ],
+    )
+
+    main()
+
+    assert (out_dir / "train.txt").exists()
+    assert (out_dir / "val.txt").exists()
+    assert (out_dir / "test.txt").exists()
+    assert (out_dir / "group_assignments.csv").exists()
+    assert (out_dir / "split_report.json").exists()
+
+    report = json.loads((out_dir / "split_report.json").read_text(encoding="utf-8"))
+    assert "splits" in report
diff --git a/training/object-detection/tests/test_splits_parsing.py b/training/object-detection/tests/test_splits_parsing.py
new file mode 100644
index 0000000..0eca315
--- /dev/null
+++ b/training/object-detection/tests/test_splits_parsing.py
@@ -0,0 +1,84 @@
+from collections import Counter
+
+from object_detection.splits.parsing import (
+    ar_bin,
+    area_bin,
+    density_bin,
+    parse_frame_idx,
+    read_yolo_label,
+    time_bucket,
+)
+
+
+def test_time_bucket():
+    assert time_bucket("010203") == "night"
+    assert time_bucket("070000") == "morning"
+    assert time_bucket("130000") == "afternoon"
+    assert time_bucket("200000") == "evening"
+    assert time_bucket("bad") == "unknown"
+
+
+def test_density_bin():
+    assert density_bin(0) == "0"
+    assert density_bin(1) == "1"
+    assert density_bin(2) == "2"
+    assert density_bin(4) == "3-4"
+    assert density_bin(7) == "5-9"
+    assert density_bin(10) == "10+"
+
+
+def test_ar_bin():
+    assert ar_bin(0.1, 0.3) == "tall"
+    assert ar_bin(0.3, 0.3) == "square"
+    assert ar_bin(0.5, 0.2) == "wide"
+    assert ar_bin(0.0, 0.2) == "invalid"
+
+
+def test_area_bin():
+    assert area_bin(0) == "0"
+    assert area_bin(0.001) == "<0.0025"
+    assert area_bin(0.005) == "0.0025-0.01"
+    assert area_bin(0.02) == "0.01-0.04"
+    assert area_bin(0.10) == "0.04-0.16"
+    assert area_bin(0.20) == ">=0.16"
+
+def test_parse_frame_idx():
+    assert parse_frame_idx("frame_000123.txt") == 123
+    assert parse_frame_idx("bad.txt") is None
+
+
+def test_read_yolo_label_happy_path(tmp_path):
+    path = tmp_path / "frame_000001.txt"
+    path.write_text(
+        "0 0.5 0.5 0.10 0.20\n"
+        "1 0.3 0.3 0.30 0.30\n",
+        encoding="utf-8",
+    )
+    n_boxes, class_counts, area_counts, ar_counts = read_yolo_label(path)
+
+    assert n_boxes == 2
+    assert class_counts == Counter({0: 1, 1: 1})
+    assert sum(area_counts.values()) == 2
+    assert sum(ar_counts.values()) == 2
+
+
+def test_read_yolo_label_empty_file(tmp_path):
+    path = tmp_path / "frame_000001.txt"
+    path.write_text("", encoding="utf-8")
+
+    n_boxes, class_counts, area_counts, ar_counts = read_yolo_label(path)
+    assert n_boxes == 0
+    assert class_counts == Counter()
+    assert area_counts == Counter()
+    assert ar_counts == Counter()
+
+
+def test_read_yolo_label_bad_lines(tmp_path):
+    path = tmp_path / "frame_000001.txt"
+    path.write_text("bad line\n0 0.5 0.5 nope 0.2\n", encoding="utf-8")
+
+    n_boxes, class_counts, area_counts, ar_counts = read_yolo_label(path)
+    assert n_boxes == 1
+    assert class_counts == Counter({0: 1})
+    assert sum(area_counts.values()) == 1
+    assert sum(ar_counts.values()) == 1
diff --git a/training/object-detection/tests/test_splits_splitter.py b/training/object-detection/tests/test_splits_splitter.py
new file mode 100644
index 0000000..6cc009e
--- /dev/null
+++ b/training/object-detection/tests/test_splits_splitter.py
@@ -0,0 +1,143 @@
+import json
+from collections import Counter
+from pathlib import Path
+
+import pytest
+
+from object_detection.splits.splitter import (
+    SplitState,
+    build_groups,
+    compute_global_targets,
+    iter_label_files,
+    l1_dist,
+    normalize_counter,
+    rarity_score,
+    split_groups_greedy,
+    summarize_split,
+    write_manifest,
+)
+
+
+def test_iter_label_files(tmp_path: Path, make_label_file_fixture):
+    make_label_file_fixture(tmp_path, "HIRMD-tankeeah-jetson-0_20250714_012827_M", 10, "0 0.5 0.5 0.1 0.2\n")
+    make_label_file_fixture(tmp_path, "HIRMD-tankeeah-jetson-0_20250714_012827_M", 11, "0 0.5 0.5 0.1 0.2\n")
+    files = sorted(iter_label_files(tmp_path))
+    assert len(files) == 2
+    assert files[0].name == "frame_000010.txt"
+
+
+def test_build_groups_filters_sites(tmp_path: Path, make_label_file_fixture):
+    make_label_file_fixture(
+        tmp_path,
+        "HIRMD-tankeeah-jetson-0_20250714_012827_M",
+        10,
+        "0 0.5 0.5 0.1 0.2\n",
+    )
+    make_label_file_fixture(
+        tmp_path,
+        "HIRMD-bear-jetsonnx-0_20250912_011859_M",
+        20,
+        "1 0.5 0.5 0.2 0.2\n",
+    )
+
+    groups = build_groups(tmp_path, sites_keep={"tankeeah"}, seed=42)
+
+    assert len(groups) == 1
+    gid = next(iter(groups))
+    assert gid.startswith("tankeeah|")
+
+
+def test_build_groups_aggregates_frames(tmp_path: Path, make_label_file_fixture):
+    video = "HIRMD-tankeeah-jetson-0_20250714_012827_M"
+    make_label_file_fixture(tmp_path, video, 10, "0 0.5 0.5 0.1 0.2\n")
+    make_label_file_fixture(tmp_path, video, 11, "0 0.5 0.5 0.1 0.2\n1 0.5 0.5 0.2 0.2\n")
+
+    groups = build_groups(tmp_path, sites_keep={"tankeeah"}, seed=42)
+    g = next(iter(groups.values()))
+
+    assert g.n_frames == 2
+    assert g.n_boxes == 3
+    assert g.class_counts == Counter({0: 2, 1: 1})
+    assert len(g.frame_paths) == 2
+
+
+def test_normalize_counter():
+    got = normalize_counter(Counter({"a": 2, "b": 1}))
+    assert got["a"] == pytest.approx(2 / 3)
+    assert got["b"] == pytest.approx(1 / 3)
+
+
+def test_l1_dist():
+    p = {"a": 0.5, "b": 0.5}
+    q = {"a": 1.0, "b": 0.0}
+    assert l1_dist(p, q, ["a", "b"]) == pytest.approx(1.0)
+
+
+def test_compute_global_targets(tmp_path: Path, make_label_file_fixture):
+    video1 = "HIRMD-tankeeah-jetson-0_20250714_012827_M"
+    video2 = "HIRMD-bear-jetsonnx-0_20250912_011859_M"
+    make_label_file_fixture(tmp_path, video1, 10, "0 0.5 0.5 0.1 0.2\n")
+    make_label_file_fixture(tmp_path, video2, 20, "1 0.5 0.5 0.2 0.2\n")
+
+    groups = build_groups(tmp_path, sites_keep={"tankeeah", "bear"}, seed=42)
+    targets = compute_global_targets(list(groups.values()))
+
+    assert targets["total_frames"] == 2
+    assert targets["class_keys"] == [0, 1]
+    assert pytest.approx(sum(targets["class_dist"].values())) == 1.0
+
+
+def test_rarity_score_positive(tmp_path: Path, make_label_file_fixture):
+    video = "HIRMD-tankeeah-jetson-0_20250714_012827_M"
+    make_label_file_fixture(tmp_path, video, 10, "0 0.5 0.5 0.1 0.2\n")
+    groups = build_groups(tmp_path, sites_keep={"tankeeah"}, seed=42)
+    g = next(iter(groups.values()))
+    score = rarity_score(g, {0: 1.0})
+    assert score > 0
+
+
+def test_split_groups_greedy_assigns_all_groups(tmp_path: Path, make_label_file_fixture):
+    videos = [
+        "HIRMD-tankeeah-jetson-0_20250714_012827_M",
+        "HIRMD-tankeeah-jetson-0_20250715_012827_M",
+        "HIRMD-kitwanga-jetson-0_20250714_012827_M",
+        "SFC-bear-jetsonnx-0_20250912_011859_M",
+    ]
+    for i, video in enumerate(videos):
+        make_label_file_fixture(tmp_path, video, 10, f"{i % 2} 0.5 0.5 0.1 0.2\n")
+
+    groups = build_groups(tmp_path, sites_keep={"tankeeah", "kitwanga", "bear"}, seed=42)
+    train, val, test, report = split_groups_greedy(
+        groups=groups,
+        seed=42,
+        train_frac=0.8,
+        val_frac=0.1,
+        test_frac=0.1,
+        weights={"class": 4.0, "tod": 1.0, "density": 1.0, "area": 1.0, "ar": 1.0, "size": 2.0},
+    )
+
+    assigned = set(train.group_ids) | set(val.group_ids) | set(test.group_ids)
+    assert assigned == set(groups.keys())
+    assert report["total_frames"] == 4
+    assert train.n_frames + val.n_frames + test.n_frames == 4
+
+
+def test_summarize_split(tmp_path: Path, make_label_file_fixture):
+    video = "HIRMD-tankeeah-jetson-0_20250714_012827_M"
+    make_label_file_fixture(tmp_path, video, 10, "0 0.5 0.5 0.1 0.2\n")
+    groups = build_groups(tmp_path, sites_keep={"tankeeah"}, seed=42)
+    g = next(iter(groups.values()))
+
+    split = SplitState("train", 0.8)
+    split.add_group(g)
+
+    summary = summarize_split(split)
+    assert summary["n_frames"] == 1
+    assert summary["n_boxes"] == 1
+    assert summary["n_groups"] == 1
+
+
+def test_write_manifest(tmp_path: Path):
+    out = tmp_path / "manifest.txt"
+    write_manifest(out, ["b/file2.txt", "a/file1.txt"])
+    assert out.read_text(encoding="utf-8") == "a/file1.txt\nb/file2.txt\n"
diff --git a/training/object-detection/tests/test_utils.py b/training/object-detection/tests/test_utils.py
new file mode 100644
index 0000000..955acd8
--- /dev/null
+++ b/training/object-detection/tests/test_utils.py
@@ -0,0 +1,20 @@
+from object_detection.utils.utils import (
+    parse_video_stem,
+)
+
+
+def test_parse_video_stem_happy_path():
+    got = parse_video_stem("HIRMD-tankeeah-jetson-0_20250714_012827_M")
+    assert got == {
+        "org": "HIRMD",
+        "site": "tankeeah",
+        "device": "jetson-0",
+        "date": "20250714",
+        "time": "012827",
+        "suffix": "M",
+    }
+
+
+def test_parse_video_stem_invalid():
+    assert parse_video_stem("not_a_valid_stem") is None
+
diff --git a/training/object-detection/tests/test_utils_utils.py b/training/object-detection/tests/test_utils_utils.py
new file mode 100644
index 0000000..a24e450
--- /dev/null
+++ b/training/object-detection/tests/test_utils_utils.py
@@ -0,0 +1,19 @@
+from object_detection.utils.utils import safe_float
+import pytest
+
+def test_safe_float():
+    assert safe_float("1.25") == 1.25
+    assert safe_float("bad", 7.0) == 7.0
+
+
+@pytest.mark.parametrize(
+    ("value", "default", "expected"),
+    [
+        ("1.5", 0.0, 1.5),
+        (2, 0.0, 2.0),
+        (None, 7.0, 7.0),
+        ("abc", -1.0, -1.0),
+    ],
+)
+def test_safe_float(value, default, expected):
+    assert safe_float(value, default) == expected
diff --git a/training/object-detection/tests/test_yolo_ls_cli.py b/training/object-detection/tests/test_yolo_ls_cli.py
new file mode 100644
index 0000000..70fe35e
--- /dev/null
+++ b/training/object-detection/tests/test_yolo_ls_cli.py
@@ -0,0 +1,63 @@
+import json
+import sys
+
+from object_detection.yolo_ls.cli import main
+
+
+def test_yolo_ls_cli_smoke(tmp_path, monkeypatch):
+    yaml_path = tmp_path / "data.yaml"
+    yaml_path.write_text("names: [Sockeye]\n", encoding="utf-8")
+
+    item = {
+        "id": 1,
+        "data": {
+            "metadata_file_site_reference_string": "tankeeah",
+            "metadata_file_filename": "HIRMD-tankeeah-jetson-0_20240704_055747_M.mp4",
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": 30,
+            "video": "s3://bucket/HIRMD-tankeeah-jetson-0_20240704_055747_M.mp4",
+        },
+        "annotations": [
+            {
+                "updated_at": "2025-03-01T03:49:36.058815Z",
+                "result": [
+                    {
+                        "type": "videorectangle",
+                        "from_name": "box",
+                        "to_name": "video",
+                        "value": {
+                            "labels": ["Sockeye"],
+                            "sequence": [
+                                {"enabled": True, "frame": 10, "x": 10, "y": 20, "width": 30, "height": 40},
+                                {"enabled": True, "frame": 12, "x": 20, "y": 30, "width": 30, "height": 40},
+                            ],
+                        },
+                    }
+                ],
+            }
+        ],
+    }
+
+    json_dir = tmp_path / "jsons"
+    json_dir.mkdir()
+    (json_dir / "a.json").write_text(json.dumps([item]), encoding="utf-8")
+
+    out_dir = tmp_path / "out"
+
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "prog",
+            str(json_dir),
+            "--data-yaml", str(yaml_path),
+            "--out", str(out_dir),
+            "--pattern", "*.json",
+            "--coord-mode", "percent",
+        ],
+    )
+
+    main()
+
+    assert (out_dir / "HIRMD-tankeeah-jetson-0_20240704_055747_M" / "frame_000010.txt").exists()
diff --git a/training/object-detection/tests/test_yolo_ls_converter.py b/training/object-detection/tests/test_yolo_ls_converter.py
new file mode 100644
index 0000000..3264b70
--- /dev/null
+++ b/training/object-detection/tests/test_yolo_ls_converter.py
@@ -0,0 +1,471 @@
+import json
+import tarfile
+from pathlib import Path
+
+import pytest
+
+from object_detection.yolo_ls.converter import YoloConverterLSVideo
+
+
+def test_stride_offset_fixed(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=2,
+    )
+    assert conv._stride_offset("video") == 2
+
+
+def test_stride_offset_video_hash_deterministic(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=3,
+        frame_offset_mode="video_hash",
+    )
+    a = conv._stride_offset("video_name")
+    b = conv._stride_offset("video_name")
+    assert a == b
+    assert 0 <= a < 3
+
+
+def test_stride_offset_invalid_mode(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=3,
+        frame_offset_mode="bad_mode",
+    )
+    with pytest.raises(ValueError, match="Invalid frame offset mode"):
+        conv._stride_offset("video")
+
+
+def test_parse_ffmpeg_rate_fraction():
+    assert YoloConverterLSVideo._parse_ffmpeg_rate("30000/1001") == pytest.approx(29.97002997)
+
+
+def test_parse_ffmpeg_rate_float_string():
+    assert YoloConverterLSVideo._parse_ffmpeg_rate("10.0") == pytest.approx(10.0)
+
+
+def test_parse_ffmpeg_rate_bad():
+    assert YoloConverterLSVideo._parse_ffmpeg_rate("not_a_rate") == 0.0
+
+
+def test_infer_total_frames_prefers_metadata_video_nb_frames(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(class_map={"Sockeye": 0}, output_dir=tmp_path / "out")
+    sample_item["data"]["metadata_video_nb_frames"] = 997
+    assert conv._infer_total_frames(sample_item) == 997
+
+
+def test_infer_total_frames_from_results_framescount(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(class_map={"Sockeye": 0}, output_dir=tmp_path / "out")
+    sample_item["data"]["metadata_video_nb_frames"] = 0
+    results = sample_item["annotations"][0]["result"]
+    assert conv._infer_total_frames(sample_item, results=results) == 30
+
+
+def test_infer_total_frames_from_duration_and_fps(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(class_map={"Sockeye": 0}, output_dir=tmp_path / "out")
+    sample_item["data"]["metadata_video_nb_frames"] = 0
+    sample_item["annotations"][0]["result"][0]["value"].pop("framesCount", None)
+    sample_item["data"]["metadata_video_duration"] = 9.7
+    sample_item["data"]["frames_per_second"] = 10.0
+    assert conv._infer_total_frames(sample_item, results=sample_item["annotations"][0]["result"]) == 97
+
+
+def test_eligible_frames_for_video_fixed_stride(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=1,
+    )
+    assert conv._eligible_frames_for_video("video", 10) == [1, 4, 7]
+
+
+def test_sample_negative_frames_for_video_is_deterministic(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=3,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+        negative_seed=42,
+    )
+    a = conv._sample_negative_frames_for_video("video", 30, 4)
+    b = conv._sample_negative_frames_for_video("video", 30, 4)
+    assert a == b
+    assert len(a) == 4
+    assert all(f % 3 == 0 for f in a)
+
+
+def test_interpolate_sequence_empty():
+    assert YoloConverterLSVideo._interpolate_sequence([]) == {}
+
+
+def test_interpolate_sequence_keyframes_and_interpolation():
+    seq = [
+        {"enabled": True, "frame": 10, "x": 0, "y": 0, "width": 10, "height": 10},
+        {"enabled": True, "frame": 12, "x": 20, "y": 20, "width": 10, "height": 10},
+    ]
+    out = YoloConverterLSVideo._interpolate_sequence(seq)
+    assert sorted(out.keys()) == [10, 11, 12]
+    assert out[10] == [(0.0, 0.0, 10.0, 10.0)]
+    assert out[12] == [(20.0, 20.0, 10.0, 10.0)]
+    assert out[11][0][0] == pytest.approx(10.0)
+    assert out[11][0][1] == pytest.approx(10.0)
+
+
+def test_interpolate_sequence_disabled_start_no_forward_interpolation():
+    seq = [
+        {"enabled": False, "frame": 10, "x": 0, "y": 0, "width": 10, "height": 10},
+        {"enabled": True, "frame": 12, "x": 20, "y": 20, "width": 10, "height": 10},
+    ]
+    out = YoloConverterLSVideo._interpolate_sequence(seq)
+    assert sorted(out.keys()) == [10, 12]
+    assert 11 not in out
+
+
+def test_parse_ts():
+    dt = YoloConverterLSVideo._parse_ts("2025-03-01T03:49:36.058815Z")
+    assert dt.year == 2025
+    assert dt.month == 3
+    assert dt.tzinfo is not None
+
+
+def test_log_error_writes_file(tmp_path: Path):
+    log_path = tmp_path / "logs" / "err.log"
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        error_log_path=log_path,
+    )
+    conv._log_error("ctx", ValueError("boom"))
+    text = log_path.read_text(encoding="utf-8")
+    assert "ERROR in ctx" in text
+    assert "ValueError: boom" in text
+
+
+def test_convert_item_writes_filesystem_labels(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        coord_mode="percent",
+    )
+    stats = conv._convert_item(sample_item)
+
+    assert stats.videos_with_boxes == 1
+    assert stats.videos_without_boxes == 0
+    assert stats.label_files_written == 3
+    assert conv._positive_frame_files_written == 3
+
+    out_file = tmp_path / "out" / "HIRMD-tankeeah-jetson-0_20240704_055747_M" / "frame_000010.txt"
+    assert out_file.exists()
+    assert out_file.read_text(encoding="utf-8").startswith("0 ")
+
+
+def test_convert_item_respects_include_sites(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        include_sites=["bear"],
+    )
+    stats = conv._convert_item(sample_item)
+
+    assert stats.videos_with_boxes == 0
+    assert stats.videos_without_boxes == 0
+    assert stats.label_files_written == 0
+
+
+def test_convert_item_no_boxes_records_empty_video_and_negative_candidate(tmp_path: Path):
+    empty_list = tmp_path / "empty.txt"
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        empty_list_path=empty_list,
+        include_negatives=True,
+    )
+
+    item = {
+        "id": 1,
+        "data": {
+            "metadata_file_site_reference_string": "tankeeah",
+            "metadata_file_filename": "HIRMD-tankeeah-jetson-0_20240704_060000_M.mp4",
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": 25,
+            "metadata_video_duration": 2.5,
+            "frames_per_second": 10.0,
+            "video": "s3://bucket/HIRMD-tankeeah-jetson-0_20240704_060000_M.mp4",
+        },
+        "annotations": [
+            {
+                "updated_at": "2025-03-01T03:49:36.058815Z",
+                "result": [],
+            }
+        ],
+    }
+
+    stats = conv._convert_item(item)
+
+    assert stats.videos_with_boxes == 0
+    assert stats.videos_without_boxes == 1
+    assert stats.label_files_written == 0
+    assert empty_list.read_text(encoding="utf-8").strip().endswith(".mp4")
+    assert len(conv._negative_candidates) == 1
+    assert conv._negative_candidates[0].total_frames == 25
+
+
+def test_convert_item_respects_frame_stride(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        frame_stride=2,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+        coord_mode="percent",
+    )
+    stats = conv._convert_item(sample_item)
+
+    assert stats.label_files_written == 2
+
+    base = tmp_path / "out" / "HIRMD-tankeeah-jetson-0_20240704_055747_M"
+    assert (base / "frame_000010.txt").exists()
+    assert not (base / "frame_000011.txt").exists()
+    assert (base / "frame_000012.txt").exists()
+
+
+def test_convert_item_writes_to_shards(tmp_path: Path, sample_item):
+    shard_dir = tmp_path / "shards"
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        shard_dir=shard_dir,
+        shard_size=10,
+        coord_mode="percent",
+    )
+    stats = conv._convert_item(sample_item)
+    conv._sharder.close()
+
+    assert stats.label_files_written == 3
+    tar_path = shard_dir / "yolo_annos-000000.tar"
+    assert tar_path.exists()
+
+    with tarfile.open(tar_path, "r") as tf:
+        names = sorted(tf.getnames())
+        assert "HIRMD-tankeeah-jetson-0_20240704_055747_M/frame_000010.txt" in names
+        content = tf.extractfile(
+            "HIRMD-tankeeah-jetson-0_20240704_055747_M/frame_000010.txt"
+        ).read().decode("utf-8")
+        assert content.startswith("0 ")
+
+
+def test_convert_item_skips_unknown_class(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Coho": 0},
+        output_dir=tmp_path / "out",
+    )
+    stats = conv._convert_item(sample_item)
+
+    assert stats.videos_with_boxes == 0
+    assert stats.videos_without_boxes == 1
+    assert stats.label_files_written == 0
+
+
+def test_convert_file_happy_path(tmp_path: Path, sample_json_file):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        coord_mode="percent",
+    )
+
+    stats = conv.convert_file(sample_json_file)
+    assert stats.videos_with_boxes == 1
+    assert stats.videos_without_boxes == 0
+    assert stats.label_files_written == 3
+    assert stats.errors == 0
+
+
+def test_convert_file_invalid_json(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+    )
+    json_path = tmp_path / "bad.json"
+    json_path.write_text("{not json", encoding="utf-8")
+
+    stats = conv.convert_file(json_path)
+    assert stats.errors == 1
+
+
+def test_convert_file_top_level_not_list(tmp_path: Path):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+    )
+    json_path = tmp_path / "bad.json"
+    json_path.write_text(json.dumps({"not": "a list"}), encoding="utf-8")
+
+    stats = conv.convert_file(json_path)
+    assert stats.errors == 1
+
+
+def test_convert_folder_reads_multiple_json_files(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        coord_mode="percent",
+    )
+
+    item1 = sample_item
+    item2 = json.loads(json.dumps(sample_item))
+    item2["data"]["metadata_file_filename"] = "HIRMD-tankeeah-jetson-0_20240704_055748_M.mp4"
+    item2["data"]["video"] = "s3://bucket/HIRMD-tankeeah-jetson-0_20240704_055748_M.mp4"
+
+    d = tmp_path / "jsons"
+    d.mkdir()
+    (d / "a.json").write_text(json.dumps([item1]), encoding="utf-8")
+    (d / "b.json").write_text(json.dumps([item2]), encoding="utf-8")
+
+    stats = conv.convert_folder(d, pattern="*.json")
+    assert stats.videos_with_boxes == 2
+    assert stats.label_files_written == 6
+    assert stats.errors == 0
+
+
+def test_materialize_negatives_writes_empty_files(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        include_negatives=True,
+        negative_ratio=0.10,
+        negatives_per_video=3,
+        frame_stride=2,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+    )
+
+    conv._convert_item(sample_item)
+
+    # No positives, so no negatives
+    empty_item = {
+        "id": 2,
+        "data": {
+            "metadata_file_site_reference_string": "tankeeah",
+            "metadata_file_filename": "HIRMD-tankeeah-jetson-0_20240704_060000_M.mp4",
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": 20,
+            "metadata_video_duration": 2.0,
+            "frames_per_second": 10.0,
+            "video": "s3://bucket/HIRMD-tankeeah-jetson-0_20240704_060000_M.mp4",
+        },
+        "annotations": [{"updated_at": "2025-03-01T03:49:36.058815Z", "result": []}],
+    }
+    conv._convert_item(empty_item)
+
+    wrote, max_neg, total_candidate_frames = conv.materialize_negatives()
+    assert wrote == 0
+    assert total_candidate_frames == 0
+    assert max_neg == 0
+
+
+def test_materialize_negatives_with_enough_positives(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        include_negatives=True,
+        negative_ratio=0.25,
+        negatives_per_video=3,
+        frame_stride=2,
+        frame_offset_mode="fixed",
+        frame_offset=0,
+    )
+
+    for i in range(4):
+        item = json.loads(json.dumps(sample_item))
+        item["id"] = i + 1
+        item["data"]["metadata_file_filename"] = f"HIRMD-tankeeah-jetson-0_20240704_05574{i}_M.mp4"
+        item["data"]["video"] = f"s3://bucket/HIRMD-tankeeah-jetson-0_20240704_05574{i}_M.mp4"
+        conv._convert_item(item)
+
+    for i in range(2):
+        empty_item = {
+            "id": 100 + i,
+            "data": {
+                "metadata_file_site_reference_string": "tankeeah",
+                "metadata_file_filename": f"HIRMD-tankeeah-jetson-0_20240704_06100{i}_M.mp4",
+                "metadata_video_width": 1280,
+                "metadata_video_height": 720,
+                "metadata_video_nb_frames": 20,
+                "metadata_video_duration": 2.0,
+                "frames_per_second": 10.0,
+                "video": f"s3://bucket/HIRMD-tankeeah-jetson-0_20240704_06100{i}_M.mp4",
+            },
+            "annotations": [{"updated_at": "2025-03-01T03:49:36.058815Z", "result": []}],
+        }
+        conv._convert_item(empty_item)
+
+    wrote, max_neg, total_candidate_frames = conv.materialize_negatives()
+
+    assert wrote == 2
+    assert max_neg == 2
+    assert total_candidate_frames == 6
+    assert conv._negative_frame_files_written == 2
+
+    txt_files = list((tmp_path / "out").rglob("frame_*.txt"))
+    assert len(txt_files) == 8 + 2
+
+
+def test_materialize_negatives_writes_to_shards(tmp_path: Path, sample_item):
+    conv = YoloConverterLSVideo(
+        class_map={"Sockeye": 0},
+        output_dir=tmp_path / "out",
+        shard_dir=tmp_path / "shards",
+        include_negatives=True,
+        negative_ratio=0.50,
+        negatives_per_video=2,
+        frame_stride=1,
+    )
+
+    for i in range(2):
+        item = json.loads(json.dumps(sample_item))
+        item["id"] = i + 1
+        item["data"]["metadata_file_filename"] = f"HIRMD-tankeeah-jetson-0_20240704_05570{i}_M.mp4"
+        item["data"]["video"] = f"s3://bucket/HIRMD-tankeeah-jetson-0_20240704_05570{i}_M.mp4"
+        s = conv._convert_item(item)
+
+    assert s.label_files_written == 3
+
+    empty_item = {
+        "id": 3,
+        "data": {
+            "metadata_file_site_reference_string": "tankeeah",
+            "metadata_file_filename": "HIRMD-tankeeah-jetson-0_20240704_070000_M.mp4",
+            "metadata_video_width": 1280,
+            "metadata_video_height": 720,
+            "metadata_video_nb_frames": 10,
+            "metadata_video_duration": 1.0,
+            "frames_per_second": 10.0,
+            "video": "s3://bucket/HIRMD-tankeeah-jetson-0_20240704_070000_M.mp4",
+        },
+        "annotations": [{"updated_at": "2025-03-01T03:49:36.058815Z", "result": []}],
+    }
+    conv._convert_item(empty_item)
+
+    wrote, _, _ = conv.materialize_negatives()
+    conv._sharder.close()
+
+    assert wrote == 2
+
+    tar_path = tmp_path / "shards" / "yolo_annos-000000.tar"
+    with tarfile.open(tar_path, "r") as tf:
+        negatives = [m for m in tf.getnames() if "20240704_070000" in m]
+        assert len(negatives) == 2
+        for name in negatives:
+            content = tf.extractfile(name).read().decode("utf-8")
+            assert content == ""
diff --git a/training/object-detection/tests/test_yolo_ls_parsing.py b/training/object-detection/tests/test_yolo_ls_parsing.py
new file mode 100644
index 0000000..e941b44
--- /dev/null
+++ b/training/object-detection/tests/test_yolo_ls_parsing.py
@@ -0,0 +1,91 @@
+from pathlib import Path
+
+import pytest
+
+from object_detection.yolo_ls.parsing import (
+    coord_mode,
+    load_class_map_from_yolo_yaml,
+    to_yolo,
+)
+
+
+def test_load_class_map_from_yolo_yaml_dict(tmp_path: Path):
+    yaml_path = tmp_path / "data.yaml"
+    yaml_path.write_text("names:\n  0: Coho\n  1: Sockeye\n", encoding="utf-8")
+
+    got = load_class_map_from_yolo_yaml(yaml_path)
+    assert got == {"Coho": 0, "Sockeye": 1}
+
+
+def test_load_class_map_from_yolo_yaml_list(tmp_path: Path):
+    yaml_path = tmp_path / "data.yaml"
+    yaml_path.write_text("names: [Coho, Sockeye]\n", encoding="utf-8")
+
+    got = load_class_map_from_yolo_yaml(yaml_path)
+    assert got == {"Coho": 0, "Sockeye": 1}
+
+
+def test_load_class_map_from_yolo_yaml_missing_names(tmp_path: Path):
+    yaml_path = tmp_path / "data.yaml"
+    yaml_path.write_text("train: images/train\n", encoding="utf-8")
+
+    with pytest.raises(ValueError, match="'names' not found"):
+        load_class_map_from_yolo_yaml(yaml_path)
+
+
+def test_load_class_map_from_yolo_yaml_bad_type(tmp_path: Path):
+    yaml_path = tmp_path / "data.yaml"
+    yaml_path.write_text("names: 123\n", encoding="utf-8")
+
+    with pytest.raises(ValueError, match="Unsupported 'names' structure"):
+        load_class_map_from_yolo_yaml(yaml_path)
+
+
+
+def test_coord_mode_normalized():
+    assert coord_mode(0.1, 0.2, 0.3, 0.4) == "normalized"
+
+
+def test_coord_mode_percent():
+    assert coord_mode(10, 20, 30, 40) == "percent"
+
+
+def test_coord_mode_pixel():
+    assert coord_mode(120, 20, 30, 40) == "pixel"
+
+
+def test_to_yolo_percent():
+    xc, yc, w, h = to_yolo(10, 20, 30, 40, vid_w=1280, vid_h=720, forced_mode="percent")
+    assert xc == pytest.approx(0.25)
+    assert yc == pytest.approx(0.40)
+    assert w == pytest.approx(0.30)
+    assert h == pytest.approx(0.40)
+
+
+def test_to_yolo_pixel():
+    xc, yc, w, h = to_yolo(64, 36, 128, 72, vid_w=1280, vid_h=720, forced_mode="pixel")
+    assert xc == pytest.approx((64 + 64) / 1280)
+    assert yc == pytest.approx((36 + 36) / 720)
+    assert w == pytest.approx(128 / 1280)
+    assert h == pytest.approx(72 / 720)
+
+
+def test_to_yolo_auto():
+    xc, yc, w, h = to_yolo(10, 20, 30, 40, vid_w=1280, vid_h=720, forced_mode="auto")
+    assert xc == pytest.approx(0.25)
+    assert yc == pytest.approx(0.40)
+    assert w == pytest.approx(0.30)
+    assert h == pytest.approx(0.40)
+
+
+def test_to_yolo_clamps():
+    xc, yc, w, h = to_yolo(-10, -10, 200, 200, vid_w=100, vid_h=100, forced_mode="pixel")
+    assert 0.0 <= xc <= 1.0
+    assert 0.0 <= yc <= 1.0
+    assert 0.0 <= w <= 1.0
+    assert 0.0 <= h <= 1.0
+
+
+def test_to_yolo_invalid_mode():
+    with pytest.raises(ValueError, match="Unknown coord_mode"):
+        to_yolo(1, 2, 3, 4, vid_w=100, vid_h=100, forced_mode="bad_mode")
diff --git a/training/object-detection/tests/test_yolo_ls_shards.py b/training/object-detection/tests/test_yolo_ls_shards.py
new file mode 100644
index 0000000..bb43849
--- /dev/null
+++ b/training/object-detection/tests/test_yolo_ls_shards.py
@@ -0,0 +1,30 @@
+import tarfile
+from pathlib import Path
+
+from object_detection.yolo_ls.shards import TarShardWriter
+
+
+def test_tar_shard_writer_writes_member(tmp_path: Path):
+    out_dir = tmp_path / "shards"
+    writer = TarShardWriter(out_dir, shard_size=10, prefix="test")
+    writer.write_text("video/frame_000001.txt", "hello\n")
+    writer.close()
+
+    tar_path = out_dir / "test-000000.tar"
+    assert tar_path.exists()
+
+    with tarfile.open(tar_path, "r") as tf:
+        assert tf.getnames() == ["video/frame_000001.txt"]
+        content = tf.extractfile("video/frame_000001.txt").read().decode("utf-8")
+        assert content == "hello\n"
+
+
+def test_tar_shard_writer_rotates(tmp_path: Path):
+    out_dir = tmp_path / "shards"
+    writer = TarShardWriter(out_dir, shard_size=1, prefix="test")
+    writer.write_text("a.txt", "a")
+    writer.write_text("b.txt", "b")
+    writer.close()
+
+    assert (out_dir / "test-000000.tar").exists()
+    assert (out_dir / "test-000001.tar").exists()
diff --git a/training/object-detection/uv.lock b/training/object-detection/uv.lock
new file mode 100644
index 0000000..17a728c
--- /dev/null
+++ b/training/object-detection/uv.lock
@@ -0,0 +1,518 @@
+version = 1
+revision = 3
+requires-python = ">=3.8"
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+    "python_full_version < '3.9'",
+]
+
+[[package]]
+name = "boto3"
+version = "1.37.38"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+dependencies = [
+    { name = "botocore", version = "1.37.38", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "jmespath", version = "1.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "s3transfer", version = "0.11.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0d/b5/d1c2e8c484cea43891629bbab6ca90ce9ca932586750bc0e786c8f096ccf/boto3-1.37.38.tar.gz", hash = "sha256:88c02910933ab7777597d1ca7c62375f52822e0aa1a8e0c51b2598a547af42b2", size = 111623, upload-time = "2025-04-21T19:27:18.06Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d3/87/8189f22ee798177bc7b40afd13f046442c5f91b699e70a950b42ff447e80/boto3-1.37.38-py3-none-any.whl", hash = "sha256:b6d42803607148804dff82389757827a24ce9271f0583748853934c86310999f", size = 139922, upload-time = "2025-04-21T19:27:16.107Z" },
+]
+
+[[package]]
+name = "boto3"
+version = "1.42.72"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+dependencies = [
+    { name = "botocore", version = "1.42.72", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "jmespath", version = "1.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "s3transfer", version = "0.16.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ff/78/daf8d8950341f4596b2ffa8e69750569a6f567bcae8f2be0a8095a15fc71/boto3-1.42.72.tar.gz", hash = "sha256:932f023ea3b54fd85df453dcfff7d8c5a0f8be144c0ad57568943505c72c1e6a", size = 112782, upload-time = "2026-03-19T21:03:35.786Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/c7/ac1e8d49b0cb5631410796dd439c621d50d9b241f98a79e38e8d466b8d00/boto3-1.42.72-py3-none-any.whl", hash = "sha256:2b5fdac4f202b2ccb9ed21f8b84229463b15573ea16941c2b1b8db1c69e08b63", size = 140555, upload-time = "2026-03-19T21:03:34.165Z" },
+]
+
+[[package]]
+name = "botocore"
+version = "1.37.38"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+dependencies = [
+    { name = "jmespath", version = "1.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "python-dateutil", marker = "python_full_version < '3.9'" },
+    { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/34/79/4e072e614339727f79afef704e5993b5b4d2667c1671c757cc4deb954744/botocore-1.37.38.tar.gz", hash = "sha256:c3ea386177171f2259b284db6afc971c959ec103fa2115911c4368bea7cbbc5d", size = 13832365, upload-time = "2025-04-21T19:27:05.245Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/1b/93f3504afc7c523dcaa8a8147cfc75421983e30b08d9f93a533929589630/botocore-1.37.38-py3-none-any.whl", hash = "sha256:23b4097780e156a4dcaadfc1ed156ce25cb95b6087d010c4bb7f7f5d9bc9d219", size = 13499391, upload-time = "2025-04-21T19:27:00.869Z" },
+]
+
+[[package]]
+name = "botocore"
+version = "1.42.72"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+dependencies = [
+    { name = "jmespath", version = "1.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "python-dateutil", marker = "python_full_version >= '3.9'" },
+    { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "urllib3", version = "2.6.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ca/ff/36bfa472894648ded83cfe7bb09faabd0db113551dbe920589cfc9cadad7/botocore-1.42.72.tar.gz", hash = "sha256:491b75eb41d46cf99cf8d6cab89f2e2c3602ce8e90d738a1da217a5fb6b2b7ef", size = 15004562, upload-time = "2026-03-19T21:03:24.45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/50/3297ba278a0d6644396011cc592ee88f628206599fc6cff25f5c83b629e7/botocore-1.42.72-py3-none-any.whl", hash = "sha256:038f34553da68df2ce70edc729f170cc198678daaad43f98649727c59f6404d4", size = 14678727, upload-time = "2026-03-19T21:03:20.575Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", version = "4.13.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "typing-extensions", version = "4.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9' and python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.9.*'",
+    "python_full_version < '3.9'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
+]
+
+[[package]]
+name = "jmespath"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
+]
+
+[[package]]
+name = "object-detection"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "boto3", version = "1.37.38", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "boto3", version = "1.42.72", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "pyyaml" },
+]
+
+[package.dev-dependencies]
+dev = [
+    { name = "pytest", version = "8.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "boto3", specifier = ">=1.37.38" },
+    { name = "pyyaml", specifier = ">=6.0.3" },
+]
+
+[package.metadata.requires-dev]
+dev = [{ name = "pytest", specifier = ">=8.3.5" }]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955, upload-time = "2024-04-20T21:34:42.531Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556, upload-time = "2024-04-20T21:34:40.434Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "8.3.5"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.9'" },
+    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "packaging", marker = "python_full_version < '3.9'" },
+    { name = "pluggy", version = "1.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+    { name = "tomli", marker = "python_full_version < '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.9.*'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" },
+    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "packaging", marker = "python_full_version == '3.9.*'" },
+    { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "pygments", marker = "python_full_version == '3.9.*'" },
+    { name = "tomli", marker = "python_full_version == '3.9.*'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
+    { name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "packaging", marker = "python_full_version >= '3.10'" },
+    { name = "pluggy", version = "1.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "pygments", marker = "python_full_version >= '3.10'" },
+    { name = "tomli", marker = "python_full_version == '3.10.*'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0d/a2/09f67a3589cb4320fb5ce90d3fd4c9752636b8b6ad8f34b54d76c5a54693/PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f", size = 186824, upload-time = "2025-09-29T20:27:35.918Z" },
+    { url = "https://files.pythonhosted.org/packages/02/72/d972384252432d57f248767556ac083793292a4adf4e2d85dfe785ec2659/PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4", size = 795069, upload-time = "2025-09-29T20:27:38.15Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/3b/6c58ac0fa7c4e1b35e48024eb03d00817438310447f93ef4431673c24138/PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3", size = 862585, upload-time = "2025-09-29T20:27:39.715Z" },
+    { url = "https://files.pythonhosted.org/packages/25/a2/b725b61ac76a75583ae7104b3209f75ea44b13cfd026aa535ece22b7f22e/PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6", size = 806018, upload-time = "2025-09-29T20:27:41.444Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/b0/b2227677b2d1036d84f5ee95eb948e7af53d59fe3e4328784e4d290607e0/PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369", size = 802822, upload-time = "2025-09-29T20:27:42.885Z" },
+    { url = "https://files.pythonhosted.org/packages/99/a5/718a8ea22521e06ef19f91945766a892c5ceb1855df6adbde67d997ea7ed/PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295", size = 143744, upload-time = "2025-09-29T20:27:44.487Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b2/2b69cee94c9eb215216fc05778675c393e3aa541131dc910df8e52c83776/PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b", size = 160082, upload-time = "2025-09-29T20:27:46.049Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" },
+    { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" },
+    { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
+    { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
+    { url = "https://files.pythonhosted.org/packages/10/cb/16c3f2cf3266edd25aaa00d6c4350381c8b012ed6f5276675b9eba8d9ff4/pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00", size = 882114, upload-time = "2025-09-25T21:32:03.376Z" },
+    { url = "https://files.pythonhosted.org/packages/71/60/917329f640924b18ff085ab889a11c763e0b573da888e8404ff486657602/pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d", size = 806638, upload-time = "2025-09-25T21:32:04.553Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/6f/529b0f316a9fd167281a6c3826b5583e6192dba792dd55e3203d3f8e655a/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a", size = 767463, upload-time = "2025-09-25T21:32:06.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/6a/b627b4e0c1dd03718543519ffb2f1deea4a1e6d42fbab8021936a4d22589/pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4", size = 794986, upload-time = "2025-09-25T21:32:07.367Z" },
+    { url = "https://files.pythonhosted.org/packages/45/91/47a6e1c42d9ee337c4839208f30d9f09caa9f720ec7582917b264defc875/pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b", size = 142543, upload-time = "2025-09-25T21:32:08.95Z" },
+    { url = "https://files.pythonhosted.org/packages/da/e3/ea007450a105ae919a72393cb06f122f288ef60bba2dc64b26e2646fa315/pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf", size = 158763, upload-time = "2025-09-25T21:32:09.96Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/62/67fc8e68a75f738c9200422bf65693fb79a4cd0dc5b23310e5202e978090/pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da", size = 184450, upload-time = "2025-09-25T21:33:00.618Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/92/861f152ce87c452b11b9d0977952259aa7df792d71c1053365cc7b09cc08/pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917", size = 174319, upload-time = "2025-09-25T21:33:02.086Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/cd/f0cfc8c74f8a030017a2b9c771b7f47e5dd702c3e28e5b2071374bda2948/pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9", size = 737631, upload-time = "2025-09-25T21:33:03.25Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/b2/18f2bd28cd2055a79a46c9b0895c0b3d987ce40ee471cecf58a1a0199805/pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5", size = 836795, upload-time = "2025-09-25T21:33:05.014Z" },
+    { url = "https://files.pythonhosted.org/packages/73/b9/793686b2d54b531203c160ef12bec60228a0109c79bae6c1277961026770/pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a", size = 750767, upload-time = "2025-09-25T21:33:06.398Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/86/a137b39a611def2ed78b0e66ce2fe13ee701a07c07aebe55c340ed2a050e/pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926", size = 727982, upload-time = "2025-09-25T21:33:08.708Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/62/71c27c94f457cf4418ef8ccc71735324c549f7e3ea9d34aba50874563561/pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7", size = 755677, upload-time = "2025-09-25T21:33:09.876Z" },
+    { url = "https://files.pythonhosted.org/packages/29/3d/6f5e0d58bd924fb0d06c3a6bad00effbdae2de5adb5cda5648006ffbd8d3/pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0", size = 142592, upload-time = "2025-09-25T21:33:10.983Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/0c/25113e0b5e103d7f1490c0e947e303fe4a696c10b501dea7a9f49d4e876c/pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007", size = 158777, upload-time = "2025-09-25T21:33:15.55Z" },
+]
+
+[[package]]
+name = "s3transfer"
+version = "0.11.5"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+dependencies = [
+    { name = "botocore", version = "1.37.38", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/2b/5c9562795c2eb2b5f63536961754760c25bf0f34af93d36aa28dea2fb303/s3transfer-0.11.5.tar.gz", hash = "sha256:8c8aad92784779ab8688a61aefff3e28e9ebdce43142808eaa3f0b0f402f68b7", size = 149107, upload-time = "2025-04-17T19:23:19.051Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/39/13402e323666d17850eca87e4cd6ecfcf9fd7809cac9efdcce10272fc29d/s3transfer-0.11.5-py3-none-any.whl", hash = "sha256:757af0f2ac150d3c75bc4177a32355c3862a98d20447b69a0161812992fe0bd4", size = 84782, upload-time = "2025-04-17T19:23:17.516Z" },
+]
+
+[[package]]
+name = "s3transfer"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+dependencies = [
+    { name = "botocore", version = "1.42.72", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
+[[package]]
+name = "tomli"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/30/31573e9457673ab10aa432461bee537ce6cef177667deca369efb79df071/tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c", size = 17477, upload-time = "2026-01-11T11:22:38.165Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/d9/3dc2289e1f3b32eb19b9785b6a006b28ee99acb37d1d47f78d4c10e28bf8/tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867", size = 153663, upload-time = "2026-01-11T11:21:45.27Z" },
+    { url = "https://files.pythonhosted.org/packages/51/32/ef9f6845e6b9ca392cd3f64f9ec185cc6f09f0a2df3db08cbe8809d1d435/tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9", size = 148469, upload-time = "2026-01-11T11:21:46.873Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c2/506e44cce89a8b1b1e047d64bd495c22c9f71f21e05f380f1a950dd9c217/tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95", size = 236039, upload-time = "2026-01-11T11:21:48.503Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/40/e1b65986dbc861b7e986e8ec394598187fa8aee85b1650b01dd925ca0be8/tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76", size = 243007, upload-time = "2026-01-11T11:21:49.456Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6f/6e39ce66b58a5b7ae572a0f4352ff40c71e8573633deda43f6a379d56b3e/tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d", size = 240875, upload-time = "2026-01-11T11:21:50.755Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/ad/cb089cb190487caa80204d503c7fd0f4d443f90b95cf4ef5cf5aa0f439b0/tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576", size = 246271, upload-time = "2026-01-11T11:21:51.81Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/63/69125220e47fd7a3a27fd0de0c6398c89432fec41bc739823bcc66506af6/tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a", size = 96770, upload-time = "2026-01-11T11:21:52.647Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/0d/a22bb6c83f83386b0008425a6cd1fa1c14b5f3dd4bad05e98cf3dbbf4a64/tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa", size = 107626, upload-time = "2026-01-11T11:21:53.459Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/6d/77be674a3485e75cacbf2ddba2b146911477bd887dda9d8c9dfb2f15e871/tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614", size = 94842, upload-time = "2026-01-11T11:21:54.831Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/43/7389a1869f2f26dba52404e1ef13b4784b6b37dac93bac53457e3ff24ca3/tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1", size = 154894, upload-time = "2026-01-11T11:21:56.07Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/05/2f9bf110b5294132b2edf13fe6ca6ae456204f3d749f623307cbb7a946f2/tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8", size = 149053, upload-time = "2026-01-11T11:21:57.467Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/41/1eda3ca1abc6f6154a8db4d714a4d35c4ad90adc0bcf700657291593fbf3/tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a", size = 243481, upload-time = "2026-01-11T11:21:58.661Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/6d/02ff5ab6c8868b41e7d4b987ce2b5f6a51d3335a70aa144edd999e055a01/tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1", size = 251720, upload-time = "2026-01-11T11:22:00.178Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/57/0405c59a909c45d5b6f146107c6d997825aa87568b042042f7a9c0afed34/tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b", size = 247014, upload-time = "2026-01-11T11:22:01.238Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/0e/2e37568edd944b4165735687cbaf2fe3648129e440c26d02223672ee0630/tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51", size = 251820, upload-time = "2026-01-11T11:22:02.727Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/1c/ee3b707fdac82aeeb92d1a113f803cf6d0f37bdca0849cb489553e1f417a/tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729", size = 97712, upload-time = "2026-01-11T11:22:03.777Z" },
+    { url = "https://files.pythonhosted.org/packages/69/13/c07a9177d0b3bab7913299b9278845fc6eaaca14a02667c6be0b0a2270c8/tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da", size = 108296, upload-time = "2026-01-11T11:22:04.86Z" },
+    { url = "https://files.pythonhosted.org/packages/18/27/e267a60bbeeee343bcc279bb9e8fbed0cbe224bc7b2a3dc2975f22809a09/tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3", size = 94553, upload-time = "2026-01-11T11:22:05.854Z" },
+    { url = "https://files.pythonhosted.org/packages/34/91/7f65f9809f2936e1f4ce6268ae1903074563603b2a2bd969ebbda802744f/tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0", size = 154915, upload-time = "2026-01-11T11:22:06.703Z" },
+    { url = "https://files.pythonhosted.org/packages/20/aa/64dd73a5a849c2e8f216b755599c511badde80e91e9bc2271baa7b2cdbb1/tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e", size = 149038, upload-time = "2026-01-11T11:22:07.56Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8a/6d38870bd3d52c8d1505ce054469a73f73a0fe62c0eaf5dddf61447e32fa/tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4", size = 242245, upload-time = "2026-01-11T11:22:08.344Z" },
+    { url = "https://files.pythonhosted.org/packages/59/bb/8002fadefb64ab2669e5b977df3f5e444febea60e717e755b38bb7c41029/tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e", size = 250335, upload-time = "2026-01-11T11:22:09.951Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/3d/4cdb6f791682b2ea916af2de96121b3cb1284d7c203d97d92d6003e91c8d/tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c", size = 245962, upload-time = "2026-01-11T11:22:11.27Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/4a/5f25789f9a460bd858ba9756ff52d0830d825b458e13f754952dd15fb7bb/tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f", size = 250396, upload-time = "2026-01-11T11:22:12.325Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/2f/b73a36fea58dfa08e8b3a268750e6853a6aac2a349241a905ebd86f3047a/tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86", size = 97530, upload-time = "2026-01-11T11:22:13.865Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/af/ca18c134b5d75de7e8dc551c5234eaba2e8e951f6b30139599b53de9c187/tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87", size = 108227, upload-time = "2026-01-11T11:22:15.224Z" },
+    { url = "https://files.pythonhosted.org/packages/22/c3/b386b832f209fee8073c8138ec50f27b4460db2fdae9ffe022df89a57f9b/tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132", size = 94748, upload-time = "2026-01-11T11:22:16.009Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/c4/84047a97eb1004418bc10bdbcfebda209fca6338002eba2dc27cc6d13563/tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6", size = 154725, upload-time = "2026-01-11T11:22:17.269Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/5d/d39038e646060b9d76274078cddf146ced86dc2b9e8bbf737ad5983609a0/tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc", size = 148901, upload-time = "2026-01-11T11:22:18.287Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e5/383be1724cb30f4ce44983d249645684a48c435e1cd4f8b5cded8a816d3c/tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66", size = 243375, upload-time = "2026-01-11T11:22:19.154Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f0/bea80c17971c8d16d3cc109dc3585b0f2ce1036b5f4a8a183789023574f2/tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d", size = 250639, upload-time = "2026-01-11T11:22:20.168Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/8f/2853c36abbb7608e3f945d8a74e32ed3a74ee3a1f468f1ffc7d1cb3abba6/tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702", size = 246897, upload-time = "2026-01-11T11:22:21.544Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f0/6c05e3196ed5337b9fe7ea003e95fd3819a840b7a0f2bf5a408ef1dad8ed/tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8", size = 254697, upload-time = "2026-01-11T11:22:23.058Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/f5/2922ef29c9f2951883525def7429967fc4d8208494e5ab524234f06b688b/tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776", size = 98567, upload-time = "2026-01-11T11:22:24.033Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/31/22b52e2e06dd2a5fdbc3ee73226d763b184ff21fc24e20316a44ccc4d96b/tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475", size = 108556, upload-time = "2026-01-11T11:22:25.378Z" },
+    { url = "https://files.pythonhosted.org/packages/48/3d/5058dff3255a3d01b705413f64f4306a141a8fd7a251e5a495e3f192a998/tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2", size = 96014, upload-time = "2026-01-11T11:22:26.138Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/4e/75dab8586e268424202d3a1997ef6014919c941b50642a1682df43204c22/tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9", size = 163339, upload-time = "2026-01-11T11:22:27.143Z" },
+    { url = "https://files.pythonhosted.org/packages/06/e3/b904d9ab1016829a776d97f163f183a48be6a4deb87304d1e0116a349519/tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0", size = 159490, upload-time = "2026-01-11T11:22:28.399Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/5a/fc3622c8b1ad823e8ea98a35e3c632ee316d48f66f80f9708ceb4f2a0322/tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df", size = 269398, upload-time = "2026-01-11T11:22:29.345Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/33/62bd6152c8bdd4c305ad9faca48f51d3acb2df1f8791b1477d46ff86e7f8/tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d", size = 276515, upload-time = "2026-01-11T11:22:30.327Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/ff/ae53619499f5235ee4211e62a8d7982ba9e439a0fb4f2f351a93d67c1dd2/tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f", size = 273806, upload-time = "2026-01-11T11:22:32.56Z" },
+    { url = "https://files.pythonhosted.org/packages/47/71/cbca7787fa68d4d0a9f7072821980b39fbb1b6faeb5f5cf02f4a5559fa28/tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b", size = 281340, upload-time = "2026-01-11T11:22:33.505Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/00/d595c120963ad42474cf6ee7771ad0d0e8a49d0f01e29576ee9195d9ecdf/tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087", size = 108106, upload-time = "2026-01-11T11:22:34.451Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/9aa0c6a505c2f80e519b43764f8b4ba93b5a0bbd2d9a9de6e2b24271b9a5/tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd", size = 120504, upload-time = "2026-01-11T11:22:35.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/9f/f1668c281c58cfae01482f7114a4b88d345e4c140386241a1a24dcc9e7bc/tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4", size = 99561, upload-time = "2026-01-11T11:22:36.624Z" },
+    { url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.13.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.9'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version == '3.9.*'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "1.26.20"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version == '3.9.*'",
+    "python_full_version < '3.9'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380, upload-time = "2024-08-29T15:43:11.37Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
diff --git a/utils/pi/services/Dockerfiles/Dockerfile-log-sync b/utils/pi/services/Dockerfiles/Dockerfile-log-sync
new file mode 100644
index 0000000..c4794b1
--- /dev/null
+++ b/utils/pi/services/Dockerfiles/Dockerfile-log-sync
@@ -0,0 +1,3 @@
+FROM rclone/rclone
+
+COPY copy_logs.sh /app/copy_logs.sh
diff --git a/utils/pi/services/analyze_salmonmd_logs.py b/utils/pi/services/analyze_salmonmd_logs.py
index b528b25..281d6d3 100644
--- a/utils/pi/services/analyze_salmonmd_logs.py
+++ b/utils/pi/services/analyze_salmonmd_logs.py
@@ -344,7 +344,7 @@ def main() -> None:
     today = date.today()
 
     # Update CSV output with year suffix
-    args.output_csv = args.output_csv.with_suffix(f".{today.year}" + args.output_csv.suffix)
+    args.output_csv = args.output_csv.with_suffix(f"_{today.year}" + args.output_csv.suffix)
 
     # Determine state file path
     if args.state_file is not None:
diff --git a/utils/pi/services/copy_logs.sh b/utils/pi/services/copy_logs.sh
new file mode 100644
index 0000000..952f5f4
--- /dev/null
+++ b/utils/pi/services/copy_logs.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+# Parse options
+while getopts "s:o:i:d:c:" opt; do
+    case $opt in
+        s) SITE_NAME="$OPTARG" ;;
+        o) ORGID="$OPTARG" ;;
+        i) DEVICE_ID="$OPTARG" ;;
+        d) DRIVE="$OPTARG" ;;
+        c) CONFIG="$OPTARG" ;;
+        \?) echo "Invalid option -$OPTARG" >&2 ;;
+    esac
+done
+
+# Check required arguments
+if [ -z "$SITE_NAME" ] || [ -z "$ORGID" ] || [ -z "$DEVICE_ID" ] || [ -z "$DRIVE" ] || [ -z "$CONFIG" ]; then
+    echo "Usage: $0 -s SITE_NAME -o ORGID -i DEVICE_ID -d DRIVE -c CONFIG"
+    exit 1
+fi
+
+rclone copy \
+    --include='${ORGID}/${SITE_NAME}/${DEVICE_ID}/logs/**' \
+    --progress \
+    /media/local_hdd "${DRIVE}"
+
+sleep 2h
diff --git a/utils/pi/services/docker-compose.yml b/utils/pi/services/docker-compose.yml
index 84f471b..1bab940 100644
--- a/utils/pi/services/docker-compose.yml
+++ b/utils/pi/services/docker-compose.yml
@@ -24,7 +24,28 @@ services:
     restart: always
     command: >
       bash -c "python3 training/tools/run_motion_detect_rtsp.py ${FLAGS} --device-id ${DEVICE_ID} --fps ${FPS} '${RTSP_URL}' \"${DRIVE}\""
-
+  log-sync:
+    build: 
+      context: .
+      dockerfile: Dockerfiles/Dockerfile-log-sync
+    init: true
+    environment:
+      - HOSTNAME
+    env_file:
+      - ./.env
+    container_name: salmonmd-log-sync
+    volumes:
+      - type: bind
+        source: ${DRIVE}
+        target: ${DRIVE}
+        bind:
+          propagation: rslave
+      - /media/local_hdd:/media/local_hdd
+      - /home/${USERNAME}/.config/rclone/:/config/rclone/
+    restart: always
+    entrypoint: /bin/sh
+    command: |
+      /app/copy_logs.sh -o "${ORGID}" -s "${SITE_NAME}" -i "${DEVICE_ID}" -d "${DRIVE}" -c /config/rclone/rclone.conf
   salmonmd-log-analyzer:
     image: ${IMAGE_REPO_HOST}/salmonmd:${TAG}
     container_name: salmonmd-log-analyzer
@@ -37,5 +58,5 @@ services:
           propagation: rslave
       - /media/local_hdd:/media/local_hdd
     command: >
-      bash -c "python3 utils/pi/services/analyze_salmonmd_logs.py --log-dir '${DRIVE}/${ORGID}/${SITE_NAME}/${DEVICE_ID}/logs/salmonmd_logs' --pattern '*salmonmd*' --output-csv '${DRIVE}/${ORGID}/${SITE_NAME}/${DEVICE_ID}/logs/outages.csv' --min-downtime-seconds 5; sleep 8h"
+      bash -c "python3 utils/pi/services/analyze_salmonmd_logs.py --log-dir '${DRIVE}/${ORGID}/${SITE_NAME}/${DEVICE_ID}/logs/salmonmd_logs' --pattern '*salmonmd*' --output-csv '${DRIVE}/${ORGID}/${SITE_NAME}/${DEVICE_ID}/logs/${ORGID}-${SITE_NAME}-${DEVICE_ID}_parsed_logs_outages.csv' --min-downtime-seconds 5; sleep 8h"
     restart: always
diff --git a/utils/pi/services/run_all_videos.py b/utils/pi/services/run_all_videos.py
index 83b0426..7db3607 100644
--- a/utils/pi/services/run_all_videos.py
+++ b/utils/pi/services/run_all_videos.py
@@ -3,7 +3,7 @@
 import contextlib, os, shlex, time, json, subprocess
 from pathlib import Path
 
-VIDEO_DIR = Path("/media/hdd/ADFG/chignik/raw/")
+VIDEO_DIR = Path("/media/remote")
 #VIDEO_DIR = Path("/app/2024_test_vids")
 STATE_DIR = VIDEO_DIR / ".state"   # keep markers together
 STATE_DIR.mkdir(exist_ok=True)
@@ -40,7 +40,7 @@ def process_video(vid: Path, flags, device_id, fps, drive):
     if flags:
         cmd += shlex.split(FLAGS)
     if device_id:
-        cmd += shlex.split(DEVICE_ID)
+        cmd += ["--device-id", DEVICE_ID]
     if fps:
         cmd += ["--fps", FPS]
     # positionals must come last: input, save_folder
@@ -49,7 +49,7 @@ def process_video(vid: Path, flags, device_id, fps, drive):
     subprocess.run(cmd, check=True)
 
 def run_all(flags="", device_id="", fps="30", drive="/media/local_hdd"):
-    vids = sorted(VIDEO_DIR.rglob("Near.*.mp4"))
+    vids = sorted(VIDEO_DIR.rglob("Far.*.mp4"))
     #vids = sorted(VIDEO_DIR.rglob("*.mp4"))
     for vid in vids:
         meta_path, lock_path = marker_paths(vid)
diff --git a/utils/pi/services/sync_logs.sh b/utils/pi/services/sync_logs.sh
old mode 100644
new mode 100755