From 5ce08527523f7b411846e49147826034fe5af708 Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Mon, 23 Feb 2026 18:13:51 -0800 Subject: [PATCH 01/35] Add dvc and uv projects --- training/object-detection/.dvc/.gitignore | 3 +++ training/object-detection/.dvc/config | 0 training/object-detection/.dvcignore | 3 +++ training/object-detection/.python-version | 1 + training/object-detection/README.md | 0 training/object-detection/main.py | 6 ++++++ training/object-detection/pyproject.toml | 7 +++++++ training/object-detection/uv.lock | 8 ++++++++ 8 files changed, 28 insertions(+) create mode 100644 training/object-detection/.dvc/.gitignore create mode 100644 training/object-detection/.dvc/config create mode 100644 training/object-detection/.dvcignore create mode 100644 training/object-detection/.python-version create mode 100644 training/object-detection/README.md create mode 100644 training/object-detection/main.py create mode 100644 training/object-detection/pyproject.toml create mode 100644 training/object-detection/uv.lock diff --git a/training/object-detection/.dvc/.gitignore b/training/object-detection/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/training/object-detection/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/training/object-detection/.dvc/config b/training/object-detection/.dvc/config new file mode 100644 index 0000000..e69de29 diff --git a/training/object-detection/.dvcignore b/training/object-detection/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/training/object-detection/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/training/object-detection/.python-version b/training/object-detection/.python-version new file mode 100644 index 0000000..cc1923a --- /dev/null +++ b/training/object-detection/.python-version @@ -0,0 +1 @@ +3.8 diff --git a/training/object-detection/README.md b/training/object-detection/README.md new file mode 100644 index 0000000..e69de29 diff --git a/training/object-detection/main.py b/training/object-detection/main.py new file mode 100644 index 0000000..f376d2d --- /dev/null +++ b/training/object-detection/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from object-detection!") + + +if __name__ == "__main__": + main() diff --git a/training/object-detection/pyproject.toml b/training/object-detection/pyproject.toml new file mode 100644 index 0000000..94a0e56 --- /dev/null +++ b/training/object-detection/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "object-detection" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.8" +dependencies = [] diff --git a/training/object-detection/uv.lock b/training/object-detection/uv.lock new file mode 100644 index 0000000..7ff0862 --- /dev/null +++ b/training/object-detection/uv.lock @@ -0,0 +1,8 @@ +version = 1 +revision = 3 +requires-python = ">=3.8" + +[[package]] +name = "object-detection" +version = "0.1.0" +source = { virtual = "." } From 6e885483523cf4b92e8c6b3269954ae0b225a691 Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Wed, 25 Feb 2026 22:59:59 -0800 Subject: [PATCH 02/35] Add raw JSON data from API --- training/object-detection/.gitignore | 1 + training/object-detection/01_raw.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 training/object-detection/.gitignore create mode 100644 training/object-detection/01_raw.dvc diff --git a/training/object-detection/.gitignore b/training/object-detection/.gitignore new file mode 100644 index 0000000..391d5f2 --- /dev/null +++ b/training/object-detection/.gitignore @@ -0,0 +1 @@ +/01_raw diff --git a/training/object-detection/01_raw.dvc b/training/object-detection/01_raw.dvc new file mode 100644 index 0000000..bfe7af9 --- /dev/null +++ b/training/object-detection/01_raw.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 843268e6b8108d3bf563defd9acacbfd.dir + size: 382874010 + nfiles: 51 + hash: md5 + path: 01_raw From 2ece96f4e5050cf8adc2e392fcb8c4fd1750dffa Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Wed, 25 Feb 2026 23:09:23 -0800 Subject: [PATCH 03/35] Fix filepaths of raw data --- training/object-detection/01_raw.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/object-detection/01_raw.dvc b/training/object-detection/01_raw.dvc index bfe7af9..a08b222 100644 --- a/training/object-detection/01_raw.dvc +++ b/training/object-detection/01_raw.dvc @@ -1,5 +1,5 @@ outs: -- md5: 843268e6b8108d3bf563defd9acacbfd.dir +- md5: 3c7812ff2347c3cf7140ceb4e459bec2.dir size: 382874010 nfiles: 51 hash: md5 From c40ab5c28065533e0747507d16fe1883cd34ad70 Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Wed, 25 Feb 2026 23:11:45 -0800 Subject: [PATCH 04/35] Update raw data from S3 --- training/object-detection/01_raw.dvc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/object-detection/01_raw.dvc b/training/object-detection/01_raw.dvc index a08b222..231a350 100644 --- a/training/object-detection/01_raw.dvc +++ b/training/object-detection/01_raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 3c7812ff2347c3cf7140ceb4e459bec2.dir - size: 382874010 - nfiles: 51 +- md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir + size: 372865559 + nfiles: 50 hash: md5 path: 01_raw From b3ef3cee9c5d2912b55dbba35cd3b4bc32b6c87f Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Wed, 25 Feb 2026 23:34:46 -0800 Subject: [PATCH 05/35] Add update JSON pipeline step --- training/object-detection/01_raw.dvc | 6 ------ training/object-detection/dvc.lock | 11 +++++++++++ training/object-detection/dvc.yaml | 12 ++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) delete mode 100644 training/object-detection/01_raw.dvc create mode 100644 training/object-detection/dvc.lock create mode 100644 training/object-detection/dvc.yaml diff --git a/training/object-detection/01_raw.dvc b/training/object-detection/01_raw.dvc deleted file mode 100644 index 231a350..0000000 --- a/training/object-detection/01_raw.dvc +++ /dev/null @@ -1,6 +0,0 @@ -outs: -- md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir - size: 372865559 - nfiles: 50 - hash: md5 - path: 01_raw diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock new file mode 100644 index 0000000..38b1ae5 --- /dev/null +++ b/training/object-detection/dvc.lock @@ -0,0 +1,11 @@ +schema: '2.0' +stages: + update_raw: + cmd: rclone sync -P --filter "- /salm_dataset*/**" --filter "+ *.json" + --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/model_input/ 01_raw + outs: + - path: 01_raw + hash: md5 + md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir + size: 372865559 + nfiles: 50 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml new file mode 100644 index 0000000..32edd1d --- /dev/null +++ b/training/object-detection/dvc.yaml @@ -0,0 +1,12 @@ +stages: + update_raw: + cmd: >- + rclone sync + -P + --filter "- /salm_dataset*/**" + --filter "+ *.json" + --filter "- *.zip" + aws:salmonvision-ml-datasets/rgb/model_input/ + 01_raw + outs: + - 01_raw From c98908e072e5fb609cfff48c4fbc7af985c2bd4d Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Thu, 26 Feb 2026 08:29:56 -0800 Subject: [PATCH 06/35] Organize data more --- training/object-detection/.dvc/config | 4 ++++ training/object-detection/data/01_raw/.gitignore | 1 + training/object-detection/dvc.lock | 5 +++-- training/object-detection/dvc.yaml | 4 ++-- 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 training/object-detection/data/01_raw/.gitignore diff --git a/training/object-detection/.dvc/config b/training/object-detection/.dvc/config index e69de29..8b006d6 100644 --- a/training/object-detection/.dvc/config +++ b/training/object-detection/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = storage +['remote "storage"'] + url = s3://salmonvision-dvc/rgb_object_detection diff --git a/training/object-detection/data/01_raw/.gitignore b/training/object-detection/data/01_raw/.gitignore new file mode 100644 index 0000000..9cd5243 --- /dev/null +++ b/training/object-detection/data/01_raw/.gitignore @@ -0,0 +1 @@ +/labelstudio_annos diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index 38b1ae5..dae0118 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -2,9 +2,10 @@ schema: '2.0' stages: update_raw: cmd: rclone sync -P --filter "- /salm_dataset*/**" --filter "+ *.json" - --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/model_input/ 01_raw + --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/model_input/ + data/01_raw/labelstudio_annos outs: - - path: 01_raw + - path: data/01_raw/labelstudio_annos hash: md5 md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir size: 372865559 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index 32edd1d..11ca0ba 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -7,6 +7,6 @@ stages: --filter "+ *.json" --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/model_input/ - 01_raw + data/01_raw/labelstudio_annos outs: - - 01_raw + - data/01_raw/labelstudio_annos From 078bda19219fa220200eee7d1792b8f25e2bc74e Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Thu, 26 Feb 2026 08:58:32 -0800 Subject: [PATCH 07/35] Add conditions and counts data --- training/object-detection/data/01_raw/.gitignore | 3 +++ .../data/01_raw/salmon_vid_counts.csv.dvc | 5 +++++ .../01_raw/salmon_vid_counts_summary.csv.dvc | 5 +++++ .../data/01_raw/sv_water_conditions.dvc | 6 ++++++ training/object-detection/dvc.lock | 16 ++++++++++++++++ training/object-detection/dvc.yaml | 7 +++++++ 6 files changed, 42 insertions(+) create mode 100644 training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc create mode 100644 training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc create mode 100644 training/object-detection/data/01_raw/sv_water_conditions.dvc diff --git a/training/object-detection/data/01_raw/.gitignore b/training/object-detection/data/01_raw/.gitignore index 9cd5243..767a4b6 100644 --- a/training/object-detection/data/01_raw/.gitignore +++ b/training/object-detection/data/01_raw/.gitignore @@ -1 +1,4 @@ /labelstudio_annos +/salmon_vid_counts.csv +/salmon_vid_counts_summary.csv +/sv_water_conditions diff --git a/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc b/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc new file mode 100644 index 0000000..1023678 --- /dev/null +++ b/training/object-detection/data/01_raw/salmon_vid_counts.csv.dvc @@ -0,0 +1,5 @@ +outs: +- md5: b6136eee30a358d7473a160197bc91be + size: 6465051 + hash: md5 + path: salmon_vid_counts.csv diff --git a/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc b/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc new file mode 100644 index 0000000..4f4cbb3 --- /dev/null +++ b/training/object-detection/data/01_raw/salmon_vid_counts_summary.csv.dvc @@ -0,0 +1,5 @@ +outs: +- md5: c65c2ba8214d26905ccb9608e9071b93 + size: 1031 + hash: md5 + path: salmon_vid_counts_summary.csv diff --git a/training/object-detection/data/01_raw/sv_water_conditions.dvc b/training/object-detection/data/01_raw/sv_water_conditions.dvc new file mode 100644 index 0000000..fc1ba77 --- /dev/null +++ b/training/object-detection/data/01_raw/sv_water_conditions.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 44403010d10d741a8e00e52c93be6c39.dir + size: 195622 + nfiles: 10 + hash: md5 + path: sv_water_conditions diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index dae0118..f401bfa 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -10,3 +10,19 @@ stages: md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir size: 372865559 nfiles: 50 + split_data: + cmd: sleep 1 + deps: + - path: data/01_raw/salmon_vid_counts.csv + hash: md5 + md5: b6136eee30a358d7473a160197bc91be + size: 6465051 + - path: data/01_raw/salmon_vid_counts_summary.csv + hash: md5 + md5: c65c2ba8214d26905ccb9608e9071b93 + size: 1031 + - path: data/01_raw/sv_water_conditions + hash: md5 + md5: 44403010d10d741a8e00e52c93be6c39.dir + size: 195622 + nfiles: 10 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index 11ca0ba..a56bbaf 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -10,3 +10,10 @@ stages: data/01_raw/labelstudio_annos outs: - data/01_raw/labelstudio_annos + split_data: + cmd: >- + sleep 1 + deps: + - data/01_raw/salmon_vid_counts.csv + - data/01_raw/salmon_vid_counts_summary.csv + - data/01_raw/sv_water_conditions From 8db310bc469e023dc6ed97077cba5b31baf9084e Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Thu, 26 Feb 2026 12:42:28 -0800 Subject: [PATCH 08/35] Start adding yolo converter --- training/object-detection/dvc.yaml | 6 + .../scripts/yolo_converter_ls_video.py | 459 ++++++++++++++++++ 2 files changed, 465 insertions(+) create mode 100644 training/object-detection/scripts/yolo_converter_ls_video.py diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index a56bbaf..069744a 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -10,6 +10,12 @@ stages: data/01_raw/labelstudio_annos outs: - data/01_raw/labelstudio_annos + build_model_input: + cmd: >- + uv run python scripts/yolo_converter_ls_video.py + deps: + - scripts/yolo_converter_ls_video.py + - data/01_raw/labelstudio_annos split_data: cmd: >- sleep 1 diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py new file mode 100644 index 0000000..641a0e6 --- /dev/null +++ b/training/object-detection/scripts/yolo_converter_ls_video.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 + +import yaml # requires PyYAML: pip install pyyaml +import json +import traceback +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Tuple, Any +from collections import defaultdict +from datetime import datetime + +def load_class_map_from_yolo_yaml(yaml_path: Path) -> Dict[str, int]: + """ + Load a YOLO-style data.yaml and return a mapping: class_name -> class_id + + Expects something like: + + names: + 0: Coho + 1: Bull + 2: Rainbow + ... + + or: + + names: [Coho, Bull, Rainbow, ...] + """ + data: Any = yaml.safe_load(Path(yaml_path).read_text()) + names = data.get("names") + if names is None: + raise ValueError(f"'names' not found in {yaml_path}") + + class_map: Dict[str, int] = {} + + if isinstance(names, dict): + # {0: 'Coho', 1: 'Bull', ...} (keys can be int or str) + for k, v in names.items(): + try: + idx = int(k) + except Exception: + raise ValueError(f"Invalid class index {k!r} in names of {yaml_path}") + label = str(v) + class_map[label] = idx + elif isinstance(names, (list, tuple)): + # ['Coho', 'Bull', 'Rainbow', ...] + for idx, label in enumerate(names): + class_map[str(label)] = idx + else: + raise ValueError(f"Unsupported 'names' structure in {yaml_path}: {type(names)}") + + return class_map + +def _interpolate_sequence(seq: Iterable[dict]) -> Dict[int, List[Tuple[float, float, float, float]]]: + """ + Given a Label Studio 'sequence' (list of keyframes) like: + + { + "frame": 47, "x": 0, "y": 62.8, "width": 15.9, "height": 15.1, "enabled": true + }, + ... + + Produce: frame_index -> list of (x, y, w, h) in the SAME units as input. + + Semantics: + - Every keyframe (enabled or not) produces a box at its own frame. + - If a keyframe has enabled=True, we linearly interpolate boxes for the frames + *between it and the next keyframe* (f0+1 .. f1-1). + - If a keyframe has enabled=False, we do NOT interpolate forward from it, + but we still keep its own box at that frame. + - A disabled keyframe can still be the *end* of an interpolation that started + from a previous enabled keyframe (since that interpolation uses the previous + keyframe's enabled flag). + """ + # Sort keyframes by frame + kfs = sorted(seq, key=lambda k: int(_safe_float(k.get("frame"), 0))) + frames_boxes: Dict[int, List[Tuple[float, float, float, float]]] = {} + + if not kfs: + return frames_boxes + + # 1) Add all keyframes as boxes at their exact frames + for k in kfs: + f = int(_safe_float(k.get("frame"), -1)) + if f < 0: + continue + + x = _safe_float(k.get("x")) + y = _safe_float(k.get("y")) + w = _safe_float(k.get("width")) + h = _safe_float(k.get("height")) + frames_boxes.setdefault(f, []).append((x, y, w, h)) + + # 2) Interpolate between consecutive keyframes when the *start* keyframe is enabled + for i in range(len(kfs) - 1): + k0 = kfs[i] + k1 = kfs[i + 1] + + f0 = int(_safe_float(k0.get("frame"), -1)) + f1 = int(_safe_float(k1.get("frame"), -1)) + if f0 < 0 or f1 <= f0: + continue + + enabled0 = bool(k0.get("enabled", True)) + if not enabled0: + # Do not interpolate forward from a disabled keyframe + continue + + x0 = _safe_float(k0.get("x")) + y0 = _safe_float(k0.get("y")) + w0 = _safe_float(k0.get("width")) + h0 = _safe_float(k0.get("height")) + + x1 = _safe_float(k1.get("x")) + y1 = _safe_float(k1.get("y")) + w1 = _safe_float(k1.get("width")) + h1 = _safe_float(k1.get("height")) + + # Fill in strictly between endpoints; endpoints themselves are already added + for f in range(f0 + 1, f1): + t = (f - f0) / float(f1 - f0) + x = x0 + (x1 - x0) * t + y = y0 + (y1 - y0) * t + w = w0 + (w1 - w0) * t + h = h0 + (h1 - h0) * t + frames_boxes.setdefault(f, []).append((x, y, w, h)) + + return frames_boxes + +@dataclass +class ConvertStats: + videos_with_boxes: int = 0 + videos_without_boxes: int = 0 + label_files_written: int = 0 + errors: int = 0 + +def _safe_float(v: Any, default: float = 0.0) -> float: + try: + return float(v) + except Exception: + return default + +def _coord_mode(x: float, y: float, w: float, h: float) -> str: + """ + Infer coordinate mode for Label Studio: + - 'percent' : typical LS UI export (0..100) + - 'normalized': already 0..1 + - 'pixel' : values > 100 (needs video width/height) + """ + mx = max(x, y, w, h) + if mx <= 1.0000001: # already normalized + return "normalized" + if mx <= 100.0000001: # percent + return "percent" + return "pixel" + + +def _to_yolo( + x: float, + y: float, + w: float, + h: float, + vid_w: int, + vid_h: int, + forced_mode: Optional[str] = None, +) -> Tuple[float, float, float, float]: + """ + Convert LS-style box to YOLO (xc, yc, w, h) in [0,1]. + + :param forced_mode: One of {"percent", "normalized", "pixel", None/"auto"}. + If None or "auto", infer from values. + """ + mode = forced_mode or "auto" + if mode == "auto": + mode = _coord_mode(x, y, w, h) + + if mode == "normalized": + xc = x + w / 2.0 + yc = y + h / 2.0 + wn = w + hn = h + elif mode == "percent": + xc = (x + w / 2.0) / 100.0 + yc = (y + h / 2.0) / 100.0 + wn = w / 100.0 + hn = h / 100.0 + elif mode == "pixel": + xc = (x + w / 2.0) / float(vid_w) if vid_w else 0.0 + yc = (y + h / 2.0) / float(vid_h) if vid_h else 0.0 + wn = w / float(vid_w) if vid_w else 0.0 + hn = h / float(vid_h) if vid_h else 0.0 + else: + raise ValueError(f"Unknown coord_mode: {forced_mode!r}") + + # clamp to [0,1] + xc = min(max(xc, 0.0), 1.0) + yc = min(max(yc, 0.0), 1.0) + wn = min(max(wn, 0.0), 1.0) + hn = min(max(hn, 0.0), 1.0) + return xc, yc, wn, hn + + +class YoloConverterLSVideo: + """ + Convert the provided Label Studio 'video' export (annotations + data) to YOLO frame txts. + + Input structure: + [ + { + "data": { + "metadata_video_width": 1280, + "metadata_video_height": 720, + "video": "s3://.../GOLD-kitkiata-jetson-1_20240720_002007_M.mp4", + "metadata_file_filename": "GOLD-kitkiata-jetson-1_20240720_002007_M.mp4", + ... + }, + "annotations": [ + { + "result": [ + { + "type": "videorectangle", + "from_name": "box", + "to_name": "video", + "value": { + "labels": ["Rainbow"], + "sequence": [ + {"frame": 47, "x": 0, "y": 62.83, "width": 15.96, "height": 15.16, "enabled": true, ...}, + ... + ] + } + }, + ... + ] + } + ] + }, + { + ... + }, + ... + ] + + Writes: + //frame_000047.txt # one line per box in that frame + """ + + def __init__( + self, + class_map: Dict[str, int], + output_dir: Path, + empty_list_path: Optional[Path] = None, + overwrite_video_dir: bool = False, + result_type: str = "videorectangle", + from_name: Optional[str] = None, # e.g., "box"; if None accept any + to_name: Optional[str] = None, # e.g., "video"; if None accept any + coord_mode: str = "auto", # "auto", "percent", "normalized", "pixel" + error_log_path: Optional[Path] = None, + ): + """ + :param coord_mode: + "auto" -> infer (default) + "percent" -> x/y/width/height are 0..100 + "normalized" -> x/y/width/height are 0..1 + "pixel" -> x/y/width/height are in pixels + :param error_log_path: where to append error tracebacks. + If None, defaults to /ls_to_yolo_errors.log + """ + self.class_map = class_map + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + self.empty_list_path = Path(empty_list_path) if empty_list_path else None + self.overwrite_video_dir = overwrite_video_dir + self.result_type = result_type + self.from_name = from_name + self.to_name = to_name + self.coord_mode = coord_mode + self.error_log_path = ( + Path(error_log_path) if error_log_path else (self.output_dir / "ls_to_yolo_errors.log") + ) + + # ---- public API ---- + + def convert_folder(self, json_dir: Path, pattern: str = "*.json") -> ConvertStats: + stats = ConvertStats() + for p in sorted(Path(json_dir).glob(pattern)): + try: + s = self.convert_file(p) + stats.videos_with_boxes += s.videos_with_boxes + stats.videos_without_boxes += s.videos_without_boxes + stats.label_files_written += s.label_files_written + except Exception as e: + stats.errors += 1 + self._log_error(f"convert_file({p})", e) + return stats + + def convert_file(self, json_path: Path) -> ConvertStats: + stats = ConvertStats() + json_path = Path(json_path) + + try: + items = json.loads(json_path.read_text()) + except Exception as e: + stats.errors += 1 + self._log_error(f"read_json({json_path})", e) + return stats + + if not isinstance(items, list): + err = ValueError(f"{json_path} must contain a top-level list") + stats.errors += 1 + self._log_error(f"validate_json({json_path})", err) + return stats + + for item in items: + try: + s = self._convert_item(item) + stats.videos_with_boxes += s.videos_with_boxes + stats.videos_without_boxes += s.videos_without_boxes + stats.label_files_written += s.label_files_written + except Exception as e: + stats.errors += 1 + item_id = item.get("id", "unknown") + self._log_error(f"_convert_item(id={item_id}, src={json_path})", e) + return stats + + # ---- internals ---- + + def _log_error(self, context: str, exc: Exception): + try: + self.error_log_path.parent.mkdir(parents=True, exist_ok=True) + with self.error_log_path.open("a") as f: + f.write(f"\n=== ERROR in {context} ===\n") + traceback.print_exception(type(exc), exc, exc.__traceback__, file=f) + except Exception: + # last-resort: don't crash because logging failed + pass + + @staticmethod + def _parse_ts(s): + return datetime.fromisoformat(s.replace("Z", "+00:00")) + + def _convert_item(self, item: dict) -> ConvertStats: + stats = ConvertStats() + + data = item.get("data") or {} + video_uri = data.get("metadata_file_filename") or data.get("video") or "unknown.mp4" + video_stem = Path(video_uri).stem + vid_w = int(_safe_float(data.get("metadata_video_width"), 0)) + vid_h = int(_safe_float(data.get("metadata_video_height"), 0)) + + annos = item.get("annotations") or [] + results = [] + if len(annos) > 0: + latest_ann = max( + annos, + key=lambda a: YoloConverterLSVideo._parse_ts(a["updated_at"]) + ) + + for r in (latest_ann.get("result") or []): + if r.get("type") != self.result_type: + continue + if self.from_name is not None and r.get("from_name") != self.from_name: + continue + if self.to_name is not None and r.get("to_name") != self.to_name: + continue + results.append(r) + + vid_dir = self.output_dir / video_stem + if not results: + stats.videos_without_boxes += 1 + if self.empty_list_path: + self.empty_list_path.parent.mkdir(parents=True, exist_ok=True) + with self.empty_list_path.open("a") as f: + f.write(f"{video_uri}\n") + return stats + + if vid_dir.exists() and not self.overwrite_video_dir: + # skip existing video dir to avoid mixing runs + return stats + vid_dir.mkdir(parents=True, exist_ok=True) + + wrote_any = False + + for r in results: + value = r.get("value") or {} + labels: List[str] = value.get("labels") or [] + if not labels: + continue + cls_name = labels[0] + if cls_name not in self.class_map: + # unknown class; skip this track + continue + cls_id = self.class_map[cls_name] + + seq: Iterable[dict] = value.get("sequence") or [] + frame_boxes = _interpolate_sequence(seq) # frame -> [(x,y,w,h), ...] + + for frame_idx, boxes in frame_boxes.items(): + label_path = vid_dir / f"frame_{frame_idx:06d}.txt" + with label_path.open("a") as f: + for (x, y, w, h) in boxes: + xc, yc, wn, hn = _to_yolo( + x, y, w, h, + vid_w=vid_w, + vid_h=vid_h, + forced_mode=self.coord_mode, + ) + line = f"{cls_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}" + f.write(line + "\n") + stats.label_files_written += 1 + wrote_any = True + + if wrote_any: + stats.videos_with_boxes += 1 + else: + stats.videos_without_boxes += 1 + if self.empty_list_path: + with self.empty_list_path.open("a") as f: + f.write(f"{video_uri}\n") + + return stats + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Convert Label Studio video JSON to YOLO frame labels") + parser.add_argument("input", help="JSON file or directory containing Label Studio JSON") + parser.add_argument("--data-yaml", required=True, help="Path to YOLO data.yaml (with 'names:' mapping)") + parser.add_argument("--out", required=True, help="Output directory") + parser.add_argument("--empty-list", default=None, help="Path to write videos with no boxes") + parser.add_argument("--pattern", default="*.json", help="Glob when input is a directory") + parser.add_argument("--overwrite", action="store_true", help="Overwrite existing per-video folders") + parser.add_argument("--from-name", default=None, help="Filter by result.from_name (e.g., 'box')") + parser.add_argument("--to-name", default=None, help="Filter by result.to_name (e.g., 'video')") + parser.add_argument("--coord-mode", default="percent", help='Set the coordinates mode: "auto", "percent", "normalized", "pixel"') + args = parser.parse_args() + + data_yaml_path = Path(args.data_yaml) + class_map = load_class_map_from_yolo_yaml(data_yaml_path) + + conv = YoloConverterLSVideo( + class_map=class_map, + output_dir=Path(args.out), + empty_list_path=Path(args.empty_list) if args.empty_list else None, + overwrite_video_dir=args.overwrite, + from_name=args.from_name, + to_name=args.to_name, + coord_mode=args.coord_mode, + ) + + inp = Path(args.input) + if inp.is_dir(): + s = conv.convert_folder(inp, pattern=args.pattern) + else: + s = conv.convert_file(inp) + + print( + f"Done. with_boxes={s.videos_with_boxes} without_boxes={s.videos_without_boxes} " + f"labels_written={s.label_files_written} errors={s.errors}" + ) From 4dd1d37544ec8fc894bc70b0ba1b23295801083b Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Thu, 26 Feb 2026 13:43:20 -0800 Subject: [PATCH 09/35] Track raw data changes before downloading --- training/object-detection/dvc.lock | 12 +++++++++--- training/object-detection/dvc.yaml | 11 ++++++++++- .../scripts/yolo_converter_ls_video.py | 10 ++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index f401bfa..9288feb 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -2,13 +2,19 @@ schema: '2.0' stages: update_raw: cmd: rclone sync -P --filter "- /salm_dataset*/**" --filter "+ *.json" - --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/model_input/ + --filter "- *.zip" aws:salmonvision-ml-datasets/rgb/raw/ data/01_raw/labelstudio_annos + deps: + - path: s3://salmonvision-ml-datasets/rgb/raw + hash: md5 + md5: 9aab20758f57f39971a38c9a676dad27.dir + size: 720238505 + nfiles: 65 outs: - path: data/01_raw/labelstudio_annos hash: md5 - md5: 3c9068cdaacbf2d8934c76a2e98c945d.dir - size: 372865559 + md5: 188b71c31be326fe36d209cc52c23337.dir + size: 379352848 nfiles: 50 split_data: cmd: sleep 1 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index 069744a..f44f02b 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -6,16 +6,25 @@ stages: --filter "- /salm_dataset*/**" --filter "+ *.json" --filter "- *.zip" - aws:salmonvision-ml-datasets/rgb/model_input/ + aws:salmonvision-ml-datasets/rgb/raw/ data/01_raw/labelstudio_annos + deps: + - s3://salmonvision-ml-datasets/rgb/raw outs: - data/01_raw/labelstudio_annos build_model_input: cmd: >- uv run python scripts/yolo_converter_ls_video.py + ../salmon-computer-vision/training/annotation_webapp_alt/ + --data-yaml batch_upload/salmon_yolo.yaml + --out data/02_interim/yolo_annos + --pattern '**/*.json' + --include-sites tankeeah kitwanga bear deps: - scripts/yolo_converter_ls_video.py - data/01_raw/labelstudio_annos + outs: + - data/02_interim/yolo_annos split_data: cmd: >- sleep 1 diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py index 641a0e6..2a4f321 100644 --- a/training/object-detection/scripts/yolo_converter_ls_video.py +++ b/training/object-detection/scripts/yolo_converter_ls_video.py @@ -254,6 +254,7 @@ def __init__( to_name: Optional[str] = None, # e.g., "video"; if None accept any coord_mode: str = "auto", # "auto", "percent", "normalized", "pixel" error_log_path: Optional[Path] = None, + include_sites: Optional[list[str]] = None, ): """ :param coord_mode: @@ -276,6 +277,7 @@ def __init__( self.error_log_path = ( Path(error_log_path) if error_log_path else (self.output_dir / "ls_to_yolo_errors.log") ) + self.include_sites = include_sites # ---- public API ---- @@ -341,6 +343,12 @@ def _convert_item(self, item: dict) -> ConvertStats: stats = ConvertStats() data = item.get("data") or {} + site = data.get("metadata_file_site_reference_string") or "" + if self.include_sites is not None: + if site not in self.include_sites: + # Not in included sites + return stats + video_uri = data.get("metadata_file_filename") or data.get("video") or "unknown.mp4" video_stem = Path(video_uri).stem vid_w = int(_safe_float(data.get("metadata_video_width"), 0)) @@ -432,6 +440,7 @@ def _convert_item(self, item: dict) -> ConvertStats: parser.add_argument("--from-name", default=None, help="Filter by result.from_name (e.g., 'box')") parser.add_argument("--to-name", default=None, help="Filter by result.to_name (e.g., 'video')") parser.add_argument("--coord-mode", default="percent", help='Set the coordinates mode: "auto", "percent", "normalized", "pixel"') + parser.add_argument("--include-sites", nargs="*", default=None, help='Only include videos of these sites') args = parser.parse_args() data_yaml_path = Path(args.data_yaml) @@ -445,6 +454,7 @@ def _convert_item(self, item: dict) -> ConvertStats: from_name=args.from_name, to_name=args.to_name, coord_mode=args.coord_mode, + include_sites=args.include_sites, ) inp = Path(args.input) From eef10ff717ecfc70f9399feae5e1975b2ecca068 Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Mon, 2 Mar 2026 10:43:50 -0800 Subject: [PATCH 10/35] Add yolo conversion to pipeline --- training/object-detection/README.md | 6 ++++ .../object-detection/config/salmon_yolo.yaml | 28 +++++++++++++++++++ .../data/02_interim/.gitignore | 1 + training/object-detection/dvc.lock | 24 ++++++++++++++++ training/object-detection/dvc.yaml | 7 +++-- training/object-detection/pyproject.toml | 2 +- .../scripts/yolo_converter_ls_video.py | 22 +++++++++++---- 7 files changed, 80 insertions(+), 10 deletions(-) create mode 100644 training/object-detection/config/salmon_yolo.yaml create mode 100644 training/object-detection/data/02_interim/.gitignore mode change 100644 => 100755 training/object-detection/scripts/yolo_converter_ls_video.py diff --git a/training/object-detection/README.md b/training/object-detection/README.md index e69de29..993a2af 100644 --- a/training/object-detection/README.md +++ b/training/object-detection/README.md @@ -0,0 +1,6 @@ +# Object Detection + +Install uv +``` +curl -LsSf https://astral.sh/uv/install.sh | sh +``` diff --git a/training/object-detection/config/salmon_yolo.yaml b/training/object-detection/config/salmon_yolo.yaml new file mode 100644 index 0000000..d1601dc --- /dev/null +++ b/training/object-detection/config/salmon_yolo.yaml @@ -0,0 +1,28 @@ +# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..] +# Classes updated on 2026-02-12 +path: /training/export_combined_bear_kitwanga_yolo # dataset root dir +train: train.txt +val: val.txt +test: test.txt + +# Classes +names: + 0: Coho + 1: Bull + 2: Rainbow + 3: Sockeye + 4: Pink + 5: Whitefish + 6: Chinook + 7: Shiner + 8: Pikeminnow + 9: Chum + 10: Steelhead + 11: Lamprey + 12: Cutthroat + 13: Stickleback + 14: Sculpin + 15: Jack_Coho + 16: Jack_Chinook + 17: Otter + 18: Sucker diff --git a/training/object-detection/data/02_interim/.gitignore b/training/object-detection/data/02_interim/.gitignore new file mode 100644 index 0000000..2019ca8 --- /dev/null +++ b/training/object-detection/data/02_interim/.gitignore @@ -0,0 +1 @@ +/yolo_annos diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index 9288feb..306d76f 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -32,3 +32,27 @@ stages: md5: 44403010d10d741a8e00e52c93be6c39.dir size: 195622 nfiles: 10 + build_model_input: + cmd: scripts/yolo_converter_ls_video.py data/01_raw/labelstudio_annos + --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos + --pattern '**/*.json' --include-sites tankeeah kitwanga bear + deps: + - path: config/salmon_yolo.yaml + hash: md5 + md5: f453f5dc54f1743eaedcc3ab117d269e + size: 547 + - path: data/01_raw/labelstudio_annos + hash: md5 + md5: 188b71c31be326fe36d209cc52c23337.dir + size: 379352848 + nfiles: 50 + - path: scripts/yolo_converter_ls_video.py + hash: md5 + md5: cca7398b0bf1053184009452ae7cdaaf + size: 16599 + outs: + - path: data/02_interim/yolo_annos + hash: md5 + md5: 58944a7c76dff71dbb220f78e59ed9d9.dir + size: 40115388 + nfiles: 782981 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index f44f02b..68aecc9 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -14,15 +14,16 @@ stages: - data/01_raw/labelstudio_annos build_model_input: cmd: >- - uv run python scripts/yolo_converter_ls_video.py - ../salmon-computer-vision/training/annotation_webapp_alt/ - --data-yaml batch_upload/salmon_yolo.yaml + scripts/yolo_converter_ls_video.py + data/01_raw/labelstudio_annos + --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos --pattern '**/*.json' --include-sites tankeeah kitwanga bear deps: - scripts/yolo_converter_ls_video.py - data/01_raw/labelstudio_annos + - config/salmon_yolo.yaml outs: - data/02_interim/yolo_annos split_data: diff --git a/training/object-detection/pyproject.toml b/training/object-detection/pyproject.toml index 94a0e56..2e3f06f 100644 --- a/training/object-detection/pyproject.toml +++ b/training/object-detection/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "object-detection" version = "0.1.0" -description = "Add your description here" +description = "SalmonVision object detector training" readme = "README.md" requires-python = ">=3.8" dependencies = [] diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py old mode 100644 new mode 100755 index 2a4f321..7e22435 --- a/training/object-detection/scripts/yolo_converter_ls_video.py +++ b/training/object-detection/scripts/yolo_converter_ls_video.py @@ -1,6 +1,14 @@ -#!/usr/bin/env python3 - -import yaml # requires PyYAML: pip install pyyaml +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.8" +# dependencies = [ +# "pyyaml>=6.0.3", +# ] +# [tool.uv] +# exclude-newer = "2026-03-02T18:41:13Z" +# /// + +import yaml import json import traceback from dataclasses import dataclass @@ -254,7 +262,7 @@ def __init__( to_name: Optional[str] = None, # e.g., "video"; if None accept any coord_mode: str = "auto", # "auto", "percent", "normalized", "pixel" error_log_path: Optional[Path] = None, - include_sites: Optional[list[str]] = None, + include_sites: List[str] = [], ): """ :param coord_mode: @@ -344,7 +352,7 @@ def _convert_item(self, item: dict) -> ConvertStats: data = item.get("data") or {} site = data.get("metadata_file_site_reference_string") or "" - if self.include_sites is not None: + if len(self.include_sites) > 0: if site not in self.include_sites: # Not in included sites return stats @@ -440,7 +448,7 @@ def _convert_item(self, item: dict) -> ConvertStats: parser.add_argument("--from-name", default=None, help="Filter by result.from_name (e.g., 'box')") parser.add_argument("--to-name", default=None, help="Filter by result.to_name (e.g., 'video')") parser.add_argument("--coord-mode", default="percent", help='Set the coordinates mode: "auto", "percent", "normalized", "pixel"') - parser.add_argument("--include-sites", nargs="*", default=None, help='Only include videos of these sites') + parser.add_argument("--include-sites", nargs="*", default=[], help='Only include videos of these sites') args = parser.parse_args() data_yaml_path = Path(args.data_yaml) @@ -459,6 +467,8 @@ def _convert_item(self, item: dict) -> ConvertStats: inp = Path(args.input) if inp.is_dir(): + print(f"Converting labels in folder {inp}") + s = conv.convert_folder(inp, pattern=args.pattern) else: s = conv.convert_file(inp) From 22e1504cb0971521f3eba59b2e38198b0b4fb7ae Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Mon, 2 Mar 2026 12:36:00 -0800 Subject: [PATCH 11/35] Shard dataset for better uploading --- training/object-detection/README.md | 14 ++- training/object-detection/dvc.lock | 13 +- training/object-detection/dvc.yaml | 2 + .../scripts/yolo_converter_ls_video.py | 113 +++++++++++++----- 4 files changed, 107 insertions(+), 35 deletions(-) diff --git a/training/object-detection/README.md b/training/object-detection/README.md index 993a2af..df2bacf 100644 --- a/training/object-detection/README.md +++ b/training/object-detection/README.md @@ -1,6 +1,18 @@ # Object Detection Install uv -``` +```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` + +Check dvc.yaml for the full pipeline. + +Run the following to run specific stages of the pipeline: +```bash +dvc repro stage_name +``` + +For example, building the model input annotations: +```bash +dvc repro build_model_input +``` diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index 306d76f..80c86f7 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -35,7 +35,8 @@ stages: build_model_input: cmd: scripts/yolo_converter_ls_video.py data/01_raw/labelstudio_annos --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos - --pattern '**/*.json' --include-sites tankeeah kitwanga bear + --out-shards data/02_interim/yolo_annos --shard-size 100000 --pattern + '**/*.json' --include-sites tankeeah kitwanga bear deps: - path: config/salmon_yolo.yaml hash: md5 @@ -48,11 +49,11 @@ stages: nfiles: 50 - path: scripts/yolo_converter_ls_video.py hash: md5 - md5: cca7398b0bf1053184009452ae7cdaaf - size: 16599 + md5: 36a7224d1eea0b6e300d421a7907c411 + size: 18877 outs: - path: data/02_interim/yolo_annos hash: md5 - md5: 58944a7c76dff71dbb220f78e59ed9d9.dir - size: 40115388 - nfiles: 782981 + md5: 978efd3dca7ac11abcb153db010c39ae.dir + size: 880998400 + nfiles: 9 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index 68aecc9..abf5983 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -18,6 +18,8 @@ stages: data/01_raw/labelstudio_annos --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos + --out-shards data/02_interim/yolo_annos + --shard-size 100000 --pattern '**/*.json' --include-sites tankeeah kitwanga bear deps: diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py index 7e22435..4f021ca 100755 --- a/training/object-detection/scripts/yolo_converter_ls_video.py +++ b/training/object-detection/scripts/yolo_converter_ls_video.py @@ -16,6 +16,47 @@ from typing import Dict, Iterable, List, Optional, Tuple, Any from collections import defaultdict from datetime import datetime +import io +import tarfile + +class TarShardWriter: + def __init__(self, out_dir: Path, shard_size: int = 10000, prefix: str = "yolo_annos"): + self.out_dir = Path(out_dir) + self.out_dir.mkdir(parents=True, exist_ok=True) + self.shard_size = int(shard_size) + self.prefix = prefix + + self._shard_idx = 0 + self._n_in_shard = 0 + self._tar = None # tarfile.TarFile + + self._open_new() + + def _open_new(self): + if self._tar is not None: + self._tar.close() + shard_name = f"{self.prefix}-{self._shard_idx:06d}.tar" + self._tar_path = self.out_dir / shard_name + self._tar = tarfile.open(self._tar_path, mode="w") # uncompressed tar + self._n_in_shard = 0 + self._shard_idx += 1 + + def write_text(self, rel_path: str, text: str): + # rotate shard if needed + if self._n_in_shard >= self.shard_size: + self._open_new() + + data = text.encode("utf-8") + ti = tarfile.TarInfo(name=rel_path) + ti.size = len(data) + self._tar.addfile(ti, io.BytesIO(data)) + + self._n_in_shard += 1 + + def close(self): + if self._tar is not None: + self._tar.close() + self._tar = None def load_class_map_from_yolo_yaml(yaml_path: Path) -> Dict[str, int]: """ @@ -263,6 +304,8 @@ def __init__( coord_mode: str = "auto", # "auto", "percent", "normalized", "pixel" error_log_path: Optional[Path] = None, include_sites: List[str] = [], + shard_dir: Optional[Path] = None, + shard_size: int = 10000, ): """ :param coord_mode: @@ -286,6 +329,9 @@ def __init__( Path(error_log_path) if error_log_path else (self.output_dir / "ls_to_yolo_errors.log") ) self.include_sites = include_sites + self.shard_dir = Path(shard_dir) if shard_dir else None + self.shard_size = int(shard_size) + self._sharder = TarShardWriter(self.shard_dir, shard_size=self.shard_size) if self.shard_dir else None # ---- public API ---- @@ -379,22 +425,10 @@ def _convert_item(self, item: dict) -> ConvertStats: continue results.append(r) - vid_dir = self.output_dir / video_stem - if not results: - stats.videos_without_boxes += 1 - if self.empty_list_path: - self.empty_list_path.parent.mkdir(parents=True, exist_ok=True) - with self.empty_list_path.open("a") as f: - f.write(f"{video_uri}\n") - return stats - - if vid_dir.exists() and not self.overwrite_video_dir: - # skip existing video dir to avoid mixing runs - return stats - vid_dir.mkdir(parents=True, exist_ok=True) - wrote_any = False + # Collect lines per frame + frame_lines: Dict[int, List[str]] = defaultdict(list) for r in results: value = r.get("value") or {} labels: List[str] = value.get("labels") or [] @@ -410,19 +444,16 @@ def _convert_item(self, item: dict) -> ConvertStats: frame_boxes = _interpolate_sequence(seq) # frame -> [(x,y,w,h), ...] for frame_idx, boxes in frame_boxes.items(): - label_path = vid_dir / f"frame_{frame_idx:06d}.txt" - with label_path.open("a") as f: - for (x, y, w, h) in boxes: - xc, yc, wn, hn = _to_yolo( - x, y, w, h, - vid_w=vid_w, - vid_h=vid_h, - forced_mode=self.coord_mode, - ) - line = f"{cls_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}" - f.write(line + "\n") - stats.label_files_written += 1 - wrote_any = True + for (x, y, w, h) in boxes: + xc, yc, wn, hn = _to_yolo( + x, y, w, h, + vid_w=vid_w, + vid_h=vid_h, + forced_mode=self.coord_mode, + ) + frame_lines[frame_idx].append(f"{cls_id} {xc:.6f} {yc:.6f} {wn:.6f} {hn:.6f}") + stats.label_files_written += 1 + wrote_any = True if wrote_any: stats.videos_with_boxes += 1 @@ -432,6 +463,25 @@ def _convert_item(self, item: dict) -> ConvertStats: with self.empty_list_path.open("a") as f: f.write(f"{video_uri}\n") + if self._sharder: + # write into shards: /frame_000123.txt + for frame_idx, lines in frame_lines.items(): + rel_path = f"{video_stem}/frame_{frame_idx:06d}.txt" + self._sharder.write_text(rel_path, "\n".join(lines) + "\n") + else: + # current behavior: write to filesystem + vid_dir = self.output_dir / video_stem + + if vid_dir.exists() and not self.overwrite_video_dir: + # skip existing video dir to avoid mixing runs + return stats + vid_dir.mkdir(parents=True, exist_ok=True) + + for frame_idx, lines in frame_lines.items(): + label_path = vid_dir / f"frame_{frame_idx:06d}.txt" + label_path.parent.mkdir(parents=True, exist_ok=True) + label_path.write_text("\n".join(lines) + "\n") + return stats @@ -449,6 +499,8 @@ def _convert_item(self, item: dict) -> ConvertStats: parser.add_argument("--to-name", default=None, help="Filter by result.to_name (e.g., 'video')") parser.add_argument("--coord-mode", default="percent", help='Set the coordinates mode: "auto", "percent", "normalized", "pixel"') parser.add_argument("--include-sites", nargs="*", default=[], help='Only include videos of these sites') + parser.add_argument("--out-shards", default=None, help="Directory to write TAR shards (instead of many files)") + parser.add_argument("--shard-size", type=int, default=10000, help="Number of frame label files per shard") args = parser.parse_args() data_yaml_path = Path(args.data_yaml) @@ -463,16 +515,21 @@ def _convert_item(self, item: dict) -> ConvertStats: to_name=args.to_name, coord_mode=args.coord_mode, include_sites=args.include_sites, + shard_dir=Path(args.out_shards) if args.out_shards else None, + shard_size=args.shard_size, ) inp = Path(args.input) if inp.is_dir(): - print(f"Converting labels in folder {inp}") + print(f"Converting labels from {inp}") s = conv.convert_folder(inp, pattern=args.pattern) else: s = conv.convert_file(inp) + if getattr(conv, "_sharder", None): + conv._sharder.close() + print( f"Done. with_boxes={s.videos_with_boxes} without_boxes={s.videos_without_boxes} " f"labels_written={s.label_files_written} errors={s.errors}" From 57caa377a55ef651b49c871cb0f3c99e838c07d9 Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Mon, 2 Mar 2026 13:20:46 -0800 Subject: [PATCH 12/35] Skip every 3rd frame for each video --- training/object-detection/README.md | 9 +- training/object-detection/dvc.lock | 7 +- training/object-detection/dvc.yaml | 2 + .../scripts/yolo_converter_ls_video.py | 182 ++++++++++-------- 4 files changed, 119 insertions(+), 81 deletions(-) diff --git a/training/object-detection/README.md b/training/object-detection/README.md index df2bacf..f5d6d7a 100644 --- a/training/object-detection/README.md +++ b/training/object-detection/README.md @@ -1,10 +1,17 @@ # Object Detection -Install uv +Training pipeline to train the SalmonVision object detection model. + +Install uv: ```bash curl -LsSf https://astral.sh/uv/install.sh | sh ``` +Install DVC: +```bash +uv tool install dvc +``` + Check dvc.yaml for the full pipeline. Run the following to run specific stages of the pipeline: diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index 80c86f7..eb8328e 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -36,7 +36,8 @@ stages: cmd: scripts/yolo_converter_ls_video.py data/01_raw/labelstudio_annos --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos --out-shards data/02_interim/yolo_annos --shard-size 100000 --pattern - '**/*.json' --include-sites tankeeah kitwanga bear + '**/*.json' --include-sites tankeeah kitwanga bear --frame-stride 3 + --frame-offset-mode video_hash deps: - path: config/salmon_yolo.yaml hash: md5 @@ -49,8 +50,8 @@ stages: nfiles: 50 - path: scripts/yolo_converter_ls_video.py hash: md5 - md5: 36a7224d1eea0b6e300d421a7907c411 - size: 18877 + md5: c076aef67ddd95232ad60c90a0acb3d4 + size: 20426 outs: - path: data/02_interim/yolo_annos hash: md5 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index abf5983..a6981e1 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -22,6 +22,8 @@ stages: --shard-size 100000 --pattern '**/*.json' --include-sites tankeeah kitwanga bear + --frame-stride 3 + --frame-offset-mode video_hash deps: - scripts/yolo_converter_ls_video.py - data/01_raw/labelstudio_annos diff --git a/training/object-detection/scripts/yolo_converter_ls_video.py b/training/object-detection/scripts/yolo_converter_ls_video.py index 4f021ca..bacff7a 100755 --- a/training/object-detection/scripts/yolo_converter_ls_video.py +++ b/training/object-detection/scripts/yolo_converter_ls_video.py @@ -18,6 +18,7 @@ from datetime import datetime import io import tarfile +import zlib class TarShardWriter: def __init__(self, out_dir: Path, shard_size: int = 10000, prefix: str = "yolo_annos"): @@ -99,82 +100,6 @@ def load_class_map_from_yolo_yaml(yaml_path: Path) -> Dict[str, int]: return class_map -def _interpolate_sequence(seq: Iterable[dict]) -> Dict[int, List[Tuple[float, float, float, float]]]: - """ - Given a Label Studio 'sequence' (list of keyframes) like: - - { - "frame": 47, "x": 0, "y": 62.8, "width": 15.9, "height": 15.1, "enabled": true - }, - ... - - Produce: frame_index -> list of (x, y, w, h) in the SAME units as input. - - Semantics: - - Every keyframe (enabled or not) produces a box at its own frame. - - If a keyframe has enabled=True, we linearly interpolate boxes for the frames - *between it and the next keyframe* (f0+1 .. f1-1). - - If a keyframe has enabled=False, we do NOT interpolate forward from it, - but we still keep its own box at that frame. - - A disabled keyframe can still be the *end* of an interpolation that started - from a previous enabled keyframe (since that interpolation uses the previous - keyframe's enabled flag). - """ - # Sort keyframes by frame - kfs = sorted(seq, key=lambda k: int(_safe_float(k.get("frame"), 0))) - frames_boxes: Dict[int, List[Tuple[float, float, float, float]]] = {} - - if not kfs: - return frames_boxes - - # 1) Add all keyframes as boxes at their exact frames - for k in kfs: - f = int(_safe_float(k.get("frame"), -1)) - if f < 0: - continue - - x = _safe_float(k.get("x")) - y = _safe_float(k.get("y")) - w = _safe_float(k.get("width")) - h = _safe_float(k.get("height")) - frames_boxes.setdefault(f, []).append((x, y, w, h)) - - # 2) Interpolate between consecutive keyframes when the *start* keyframe is enabled - for i in range(len(kfs) - 1): - k0 = kfs[i] - k1 = kfs[i + 1] - - f0 = int(_safe_float(k0.get("frame"), -1)) - f1 = int(_safe_float(k1.get("frame"), -1)) - if f0 < 0 or f1 <= f0: - continue - - enabled0 = bool(k0.get("enabled", True)) - if not enabled0: - # Do not interpolate forward from a disabled keyframe - continue - - x0 = _safe_float(k0.get("x")) - y0 = _safe_float(k0.get("y")) - w0 = _safe_float(k0.get("width")) - h0 = _safe_float(k0.get("height")) - - x1 = _safe_float(k1.get("x")) - y1 = _safe_float(k1.get("y")) - w1 = _safe_float(k1.get("width")) - h1 = _safe_float(k1.get("height")) - - # Fill in strictly between endpoints; endpoints themselves are already added - for f in range(f0 + 1, f1): - t = (f - f0) / float(f1 - f0) - x = x0 + (x1 - x0) * t - y = y0 + (y1 - y0) * t - w = w0 + (w1 - w0) * t - h = h0 + (h1 - h0) * t - frames_boxes.setdefault(f, []).append((x, y, w, h)) - - return frames_boxes - @dataclass class ConvertStats: videos_with_boxes: int = 0 @@ -306,6 +231,9 @@ def __init__( include_sites: List[str] = [], shard_dir: Optional[Path] = None, shard_size: int = 10000, + frame_stride: int = 1, + frame_offset_mode: str = "fixed", + frame_offset: int = 0, ): """ :param coord_mode: @@ -332,6 +260,9 @@ def __init__( self.shard_dir = Path(shard_dir) if shard_dir else None self.shard_size = int(shard_size) self._sharder = TarShardWriter(self.shard_dir, shard_size=self.shard_size) if self.shard_dir else None + self.frame_stride = max(1, int(frame_stride)) + self.frame_offset_mode = frame_offset_mode + self.frame_offset = int(frame_offset) # ---- public API ---- @@ -379,6 +310,94 @@ def convert_file(self, json_path: Path) -> ConvertStats: # ---- internals ---- + def _stride_offset(self, video_stem: str) -> int: + if self.frame_stride <= 1: + return 0 + if self.frame_offset_mode == "fixed": + return int(self.frame_offset) % self.frame_stride + if self.frame_offset_mode == "video_hash": + # deterministic across runs + platforms + return zlib.crc32(video_stem.encode("utf-8")) % self.frame_stride + + raise ValueError("Invalid frame offset mode") + + @staticmethod + def _interpolate_sequence(seq: Iterable[dict]) -> Dict[int, List[Tuple[float, float, float, float]]]: + """ + Given a Label Studio 'sequence' (list of keyframes) like: + + { + "frame": 47, "x": 0, "y": 62.8, "width": 15.9, "height": 15.1, "enabled": true + }, + ... + + Produce: frame_index -> list of (x, y, w, h) in the SAME units as input. + + Semantics: + - Every keyframe (enabled or not) produces a box at its own frame. + - If a keyframe has enabled=True, we linearly interpolate boxes for the frames + *between it and the next keyframe* (f0+1 .. f1-1). + - If a keyframe has enabled=False, we do NOT interpolate forward from it, + but we still keep its own box at that frame. + - A disabled keyframe can still be the *end* of an interpolation that started + from a previous enabled keyframe (since that interpolation uses the previous + keyframe's enabled flag). + """ + # Sort keyframes by frame + kfs = sorted(seq, key=lambda k: int(_safe_float(k.get("frame"), 0))) + frames_boxes: Dict[int, List[Tuple[float, float, float, float]]] = {} + + if not kfs: + return frames_boxes + + # 1) Add all keyframes as boxes at their exact frames + for k in kfs: + f = int(_safe_float(k.get("frame"), -1)) + if f < 0: + continue + + x = _safe_float(k.get("x")) + y = _safe_float(k.get("y")) + w = _safe_float(k.get("width")) + h = _safe_float(k.get("height")) + frames_boxes.setdefault(f, []).append((x, y, w, h)) + + # 2) Interpolate between consecutive keyframes when the *start* keyframe is enabled + for i in range(len(kfs) - 1): + k0 = kfs[i] + k1 = kfs[i + 1] + + f0 = int(_safe_float(k0.get("frame"), -1)) + f1 = int(_safe_float(k1.get("frame"), -1)) + if f0 < 0 or f1 <= f0: + continue + + enabled0 = bool(k0.get("enabled", True)) + if not enabled0: + # Do not interpolate forward from a disabled keyframe + continue + + x0 = _safe_float(k0.get("x")) + y0 = _safe_float(k0.get("y")) + w0 = _safe_float(k0.get("width")) + h0 = _safe_float(k0.get("height")) + + x1 = _safe_float(k1.get("x")) + y1 = _safe_float(k1.get("y")) + w1 = _safe_float(k1.get("width")) + h1 = _safe_float(k1.get("height")) + + # Fill in strictly between endpoints; endpoints themselves are already added + for f in range(f0 + 1, f1): + t = (f - f0) / float(f1 - f0) + x = x0 + (x1 - x0) * t + y = y0 + (y1 - y0) * t + w = w0 + (w1 - w0) * t + h = h0 + (h1 - h0) * t + frames_boxes.setdefault(f, []).append((x, y, w, h)) + + return frames_boxes + def _log_error(self, context: str, exc: Exception): try: self.error_log_path.parent.mkdir(parents=True, exist_ok=True) @@ -441,7 +460,7 @@ def _convert_item(self, item: dict) -> ConvertStats: cls_id = self.class_map[cls_name] seq: Iterable[dict] = value.get("sequence") or [] - frame_boxes = _interpolate_sequence(seq) # frame -> [(x,y,w,h), ...] + frame_boxes = self._interpolate_sequence(seq) # frame -> [(x,y,w,h), ...] for frame_idx, boxes in frame_boxes.items(): for (x, y, w, h) in boxes: @@ -501,6 +520,12 @@ def _convert_item(self, item: dict) -> ConvertStats: parser.add_argument("--include-sites", nargs="*", default=[], help='Only include videos of these sites') parser.add_argument("--out-shards", default=None, help="Directory to write TAR shards (instead of many files)") parser.add_argument("--shard-size", type=int, default=10000, help="Number of frame label files per shard") + parser.add_argument("--frame-stride", type=int, default=1, + help="Keep every Nth frame (1 keeps all)") + parser.add_argument("--frame-offset-mode", choices=["fixed", "video_hash"], default="video_hash", + help="How to choose offset within stride") + parser.add_argument("--frame-offset", type=int, default=0, + help="Offset for fixed mode (0..stride-1)") args = parser.parse_args() data_yaml_path = Path(args.data_yaml) @@ -517,6 +542,9 @@ def _convert_item(self, item: dict) -> ConvertStats: include_sites=args.include_sites, shard_dir=Path(args.out_shards) if args.out_shards else None, shard_size=args.shard_size, + frame_stride=args.frame_stride, + frame_offset_mode=args.frame_offset_mode, + frame_offset=args.frame_offset, ) inp = Path(args.input) From 4cc372adcb4f3441b2272b79081ac9c9ec62cf6c Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Mon, 2 Mar 2026 15:30:04 -0800 Subject: [PATCH 13/35] Add unpacking step --- training/object-detection/.gitignore | 1 - .../data/02_interim/.gitignore | 1 + training/object-detection/dvc.lock | 9 +++++ training/object-detection/dvc.yaml | 9 +++++ .../object-detection/scripts/unpack_annos.sh | 33 +++++++++++++++++++ 5 files changed, 52 insertions(+), 1 deletion(-) create mode 100755 training/object-detection/scripts/unpack_annos.sh diff --git a/training/object-detection/.gitignore b/training/object-detection/.gitignore index 391d5f2..e69de29 100644 --- a/training/object-detection/.gitignore +++ b/training/object-detection/.gitignore @@ -1 +0,0 @@ -/01_raw diff --git a/training/object-detection/data/02_interim/.gitignore b/training/object-detection/data/02_interim/.gitignore index 2019ca8..8d21cd8 100644 --- a/training/object-detection/data/02_interim/.gitignore +++ b/training/object-detection/data/02_interim/.gitignore @@ -1 +1,2 @@ /yolo_annos +/yolo_annos_unpacked diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index eb8328e..517e0bf 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -58,3 +58,12 @@ stages: md5: 978efd3dca7ac11abcb153db010c39ae.dir size: 880998400 nfiles: 9 + unpack_annos: + cmd: scripts/unpack_annos.sh data/02_interim/yolo_annos + data/02_interim/yolo_annos_unpacked + deps: + - path: data/02_interim/yolo_annos + hash: md5 + md5: 978efd3dca7ac11abcb153db010c39ae.dir + size: 880998400 + nfiles: 9 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index a6981e1..8dfd154 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -12,6 +12,7 @@ stages: - s3://salmonvision-ml-datasets/rgb/raw outs: - data/01_raw/labelstudio_annos + frozen: true build_model_input: cmd: >- scripts/yolo_converter_ls_video.py @@ -30,6 +31,13 @@ stages: - config/salmon_yolo.yaml outs: - data/02_interim/yolo_annos + unpack_annos: + cmd: >- + scripts/unpack_annos.sh + data/02_interim/yolo_annos + data/02_interim/yolo_annos_unpacked + deps: + - data/02_interim/yolo_annos split_data: cmd: >- sleep 1 @@ -37,3 +45,4 @@ stages: - data/01_raw/salmon_vid_counts.csv - data/01_raw/salmon_vid_counts_summary.csv - data/01_raw/sv_water_conditions + - data/02_interim/yolo_annos_unpacked diff --git a/training/object-detection/scripts/unpack_annos.sh b/training/object-detection/scripts/unpack_annos.sh new file mode 100755 index 0000000..c78c455 --- /dev/null +++ b/training/object-detection/scripts/unpack_annos.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -e + +help_msg() { + echo "$0 [-h] in_folder out_folder" +} + +# Get the options +while getopts ":h" option; do + case $option in + h) # display Help + help_msg + exit;; + \?) # Invalid option + echo "Error: Invalid option" + help_msg + exit;; + esac +done + +# Check if exactly two arguments are given +if [ $# -ne 2 ]; then + help_msg + exit 1 +fi + +in_path="$1" +out_path="$2" + +mkdir -p "$out_path" +for f in "$in_path"/*.tar; do + tar -xf "$f" -C "$out_path" +done From 3e57d9b5aa7bfc387c764e0429fc9cbfbe2ef32b Mon Sep 17 00:00:00 2001 From: KamiCreed <9517086+KamiCreed@users.noreply.github.com> Date: Fri, 6 Mar 2026 19:40:35 -0800 Subject: [PATCH 14/35] Fix frame sampling --- training/object-detection/dvc.lock | 30 +- training/object-detection/dvc.yaml | 1 + .../object-detection/scripts/make_splits.py | 656 ++++++++++++++++++ .../scripts/yolo_converter_ls_video.py | 8 +- 4 files changed, 679 insertions(+), 16 deletions(-) create mode 100644 training/object-detection/scripts/make_splits.py diff --git a/training/object-detection/dvc.lock b/training/object-detection/dvc.lock index 517e0bf..a8cdda9 100644 --- a/training/object-detection/dvc.lock +++ b/training/object-detection/dvc.lock @@ -33,11 +33,11 @@ stages: size: 195622 nfiles: 10 build_model_input: - cmd: scripts/yolo_converter_ls_video.py data/01_raw/labelstudio_annos - --data-yaml config/salmon_yolo.yaml --out data/02_interim/yolo_annos - --out-shards data/02_interim/yolo_annos --shard-size 100000 --pattern - '**/*.json' --include-sites tankeeah kitwanga bear --frame-stride 3 - --frame-offset-mode video_hash + cmd: rm -r data/02_interim/yolo_annos || scripts/yolo_converter_ls_video.py + data/01_raw/labelstudio_annos --data-yaml config/salmon_yolo.yaml --out + data/02_interim/yolo_annos --out-shards data/02_interim/yolo_annos + --shard-size 100000 --pattern '**/*.json' --include-sites tankeeah + kitwanga bear --frame-stride 3 --frame-offset-mode video_hash deps: - path: config/salmon_yolo.yaml hash: md5 @@ -50,20 +50,20 @@ stages: nfiles: 50 - path: scripts/yolo_converter_ls_video.py hash: md5 - md5: c076aef67ddd95232ad60c90a0acb3d4 - size: 20426 + md5: ae7fb35f80b387294b652e733c3ba500 + size: 20699 outs: - path: data/02_interim/yolo_annos hash: md5 - md5: 978efd3dca7ac11abcb153db010c39ae.dir - size: 880998400 - nfiles: 9 + md5: 473a8eedf5fd4cc5d5b8b6d1f80681e7.dir + size: 293713920 + nfiles: 3 unpack_annos: - cmd: scripts/unpack_annos.sh data/02_interim/yolo_annos - data/02_interim/yolo_annos_unpacked + cmd: rm -r data/02_interim/yolo_annos_unpacked || scripts/unpack_annos.sh + data/02_interim/yolo_annos data/02_interim/yolo_annos_unpacked deps: - path: data/02_interim/yolo_annos hash: md5 - md5: 978efd3dca7ac11abcb153db010c39ae.dir - size: 880998400 - nfiles: 9 + md5: 473a8eedf5fd4cc5d5b8b6d1f80681e7.dir + size: 293713920 + nfiles: 3 diff --git a/training/object-detection/dvc.yaml b/training/object-detection/dvc.yaml index 8dfd154..02b6a10 100644 --- a/training/object-detection/dvc.yaml +++ b/training/object-detection/dvc.yaml @@ -33,6 +33,7 @@ stages: - data/02_interim/yolo_annos unpack_annos: cmd: >- + rm -r data/02_interim/yolo_annos_unpacked || scripts/unpack_annos.sh data/02_interim/yolo_annos data/02_interim/yolo_annos_unpacked diff --git a/training/object-detection/scripts/make_splits.py b/training/object-detection/scripts/make_splits.py new file mode 100644 index 0000000..4293706 --- /dev/null +++ b/training/object-detection/scripts/make_splits.py @@ -0,0 +1,656 @@ +#!/usr/bin/env python3 +""" +make_splits.py + +Group-wise stratified-ish split for unpacked YOLO label files. + +- Input: unpacked labels directory that looks like: + //frame_000123.txt + + where video_stem looks like: + ORG-site-device-id_YYYYMMDD_HHMMSS_M + +- Output: + out_dir/train.txt + out_dir/val.txt + out_dir/test.txt + out_dir/group_assignments.csv + out_dir/split_report.json + +Split unit (to prevent leakage): group_id = site + device + date(YYYYMMDD) +Balancing objectives (soft): class counts, time-of-day, density bins, box area bins. +""" + +from __future__ import annotations + +import argparse +import csv +import json +import math +import os +import random +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Tuple, Optional, Iterable, Any +from collections import defaultdict, Counter + +# ----------------------------- +# Parsing helpers +# ----------------------------- + +_STEM_RE = re.compile( + r""" + ^ + (?P.+?) # ORG-site-device-id (up to first underscore) + _ + (?P\d{8}) # YYYYMMDD + _ + (?P