diff --git a/tools/auto_labeling_3d/README.md b/tools/auto_labeling_3d/README.md index 55d6ae1f4..0d41bf1e9 100644 --- a/tools/auto_labeling_3d/README.md +++ b/tools/auto_labeling_3d/README.md @@ -81,7 +81,17 @@ docker run -it --gpus '"device=0"' --name auto_labeling_3d --shm-size=64g -d -v ## 2. Prepare Dataset -Prepare your non-annotated T4dataset in the following structure: +### From a T4dataset ID List + +If you have locally installed webauto and a list of UUIDs, you can automatically download and prepare the data using the following script. You'll need to create a simple text file with the list of IDs you wish to download, e.g. `tools/auto_labeling_3d/scripts/id_list_example.txt`: +``` +bash tools/auto_labeling_3d/scripts/download_id_list_from_webauto.sh [PROJECT_ID := x2_dev] [MAX_JOBS := 5] +``` +You won't usually need to, but you can also flatten/unflatten a subdirectory of non-annotated t4datasets manually using `flatten_webauto_artifacts.sh` and `unflatten_webauto_artifacts.sh`. + +### Manually + +You can also prepare manually your non-annotated T4dataset in the following structure (note there must not be a `version_id` folder, like `0/` or `1/`): ``` - data/t4dataset/ diff --git a/tools/auto_labeling_3d/scripts/download_id_list_from_webauto.sh b/tools/auto_labeling_3d/scripts/download_id_list_from_webauto.sh new file mode 100644 index 000000000..27a3cca2c --- /dev/null +++ b/tools/auto_labeling_3d/scripts/download_id_list_from_webauto.sh @@ -0,0 +1,280 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Default options +FLATTEN=true +DELETE_OLD_STRUCTURE=true + +# --- Option parsing (only --flatten=... --delete-old-structure=...) --- + +POSITIONAL=() +while [ "$#" -gt 0 ]; do + case "$1" in + --flatten=*) + val="${1#*=}" + case "$val" in + true|TRUE|1|yes|y) FLATTEN=true ;; + false|FALSE|0|no|n) FLATTEN=false ;; + *) + echo "Invalid value for --flatten: $val (use true/false)" >&2 + exit 1 + ;; + esac + shift + ;; + --delete-old-structure=*) + val="${1#*=}" + case "$val" in + true|TRUE|1|yes|y) DELETE_OLD_STRUCTURE=true ;; + false|FALSE|0|no|n) DELETE_OLD_STRUCTURE=false ;; + *) + echo "Invalid value for --delete-old-structure: $val (use true/false)" >&2 + exit 1 + ;; + esac + shift + ;; + --help|-h) + echo "Usage: $0 [--flatten=true|false] ID_FILE OUTPUT_DIR [PROJECT_ID] [MAX_JOBS]" >&2 + echo " ID_FILE : Text file with one annotation-dataset-id per line" >&2 + echo " OUTPUT_DIR : Destination directory for downloaded assets" >&2 + echo " PROJECT_ID : (optional) WebAuto project-id, default: x2_dev" >&2 + echo " MAX_JOBS : (optional) parallel downloads, default: 5" >&2 + echo "Options:" >&2 + echo " --flatten=true|false : Flatten artifact structure after download (default true)" >&2 + echo " --delete-old-structure=true|false : Remove original '0/' dir after flattening (default true)" >&2 + exit 0 + ;; + *) + POSITIONAL+=("$1") + shift + ;; + esac +done + +set -- "${POSITIONAL[@]}" + +if [ "$#" -lt 2 ] || [ "$#" -gt 4 ]; then + echo "Usage: $0 [--flatten=true|false] [--delete-old-structure=true|false] ID_FILE OUTPUT_DIR [PROJECT_ID := x2_dev] [MAX_JOBS := 5]" >&2 + exit 1 +fi + +ID_FILE="$1" +OUTPUT_DIR="$2" +PROJECT_ID="${3:-x2_dev}" +MAX_JOBS="${4:-5}" +TYPE="non_annotated_dataset" + +if [ ! -f "$ID_FILE" ]; then + echo "ID file not found: $ID_FILE" >&2 + exit 1 +fi + +mkdir -p "$OUTPUT_DIR" + +# Read IDs into an array (ignore empty lines and comments) +mapfile -t IDS < <(grep -Ev '^\s*($|#)' "$ID_FILE" || true) +TOTAL=${#IDS[@]} + +if [ "$TOTAL" -eq 0 ]; then + echo "No IDs found in $ID_FILE" >&2 + exit 1 +fi + +echo "ID file : $ID_FILE" +echo "Output dir : $OUTPUT_DIR" +echo "Project ID : $PROJECT_ID" +echo "Type : $TYPE" +echo "Parallel jobs: $MAX_JOBS" +echo "Total IDs : $TOTAL" +echo "Flatten : $FLATTEN" +echo "Delete 0/ : $DELETE_OLD_STRUCTURE" +echo + +# Temp file to track completion status +DONE_FILE="$(mktemp)" +LOG_FILE="$(mktemp)" +trap 'rm -f "$DONE_FILE" "$LOG_FILE"' EXIT + +download_one() { + local id="$1" + local status="OK" + + echo "[START] $id" >> "$LOG_FILE" + + if ! webauto data annotation-dataset pull-intermediate-artifact \ + --project-id "$PROJECT_ID" \ + --annotation-dataset-id "$id" \ + --type "$TYPE" \ + --asset-dir "$OUTPUT_DIR"; then + status="FAIL" + fi + + echo "$status $id" >> "$DONE_FILE" + echo "[DONE] $id -> $status" >> "$LOG_FILE" +} + +# Progress bar printer (runs in main process) +print_progress() { + local completed percent width filled empty bar + width=40 + + while :; do + if [ -f "$DONE_FILE" ]; then + completed=$(wc -l < "$DONE_FILE" 2>/dev/null || echo 0) + else + completed=0 + fi + + if [ "$TOTAL" -le 0 ]; then + percent=0 + else + percent=$(( completed * 100 / TOTAL )) + fi + + filled=$(( percent * width / 100 )) + empty=$(( width - filled )) + + bar="$(printf '#%.0s' $(seq 1 "$filled" 2>/dev/null || true))" + bar="$bar$(printf ' %.0s' $(seq 1 "$empty" 2>/dev/null || true))" + + printf "\r[%s] %3d%% (%d/%d) completed" "$bar" "$percent" "$completed" "$TOTAL" + + if [ "$completed" -ge "$TOTAL" ]; then + break + fi + + sleep 1 + done + + echo +} + +# Flatten a single dataset directory: OUTPUT_DIR/ +flatten_one() { + local root_dir="$1" + + if [ ! -d "$root_dir" ]; then + echo " -> '$root_dir' not found, skipping." + return + fi + + # Detect structure dir: prefer 0/, but also handle already-renamed WEBAUTO_STRUCTURE_0 + local structure_dir="" + if [ -d "$root_dir/0" ]; then + structure_dir="$root_dir/0" + elif [ -d "$root_dir/WEBAUTO_STRUCTURE_0" ]; then + structure_dir="$root_dir/WEBAUTO_STRUCTURE_0" + else + echo " -> No '0' or 'WEBAUTO_STRUCTURE_0' directory, skipping flatten." + return + fi + + local base="$structure_dir/intermediate_artifacts/non_annotated_dataset" + if [ ! -d "$base" ]; then + echo " -> '$base' not found, skipping flatten." + return + fi + + shopt -s nullglob + local candidates=( "$base"/* ) + shopt -u nullglob + + if [ "${#candidates[@]}" -eq 0 ]; then + echo " -> No artifact directory under '$base', skipping flatten." + return + fi + + local artifact_dir="${candidates[0]}" + echo " -> Using artifact dir: $artifact_dir" + + # Move annotation/ to root_dir if present + if [ -d "$artifact_dir/annotation" ]; then + if [ -e "$root_dir/annotation" ]; then + echo " -> 'annotation/' already exists at root, leaving as-is." + else + echo " -> Moving annotation/ to $root_dir/" + mv "$artifact_dir/annotation" "$root_dir/" + fi + else + echo " -> No annotation/ found in artifact dir." + fi + + # Move data/ to root_dir if present + if [ -d "$artifact_dir/data" ]; then + if [ -e "$root_dir/data" ]; then + echo " -> 'data/' already exists at root, leaving as-is." + else + echo " -> Moving data/ to $root_dir/" + mv "$artifact_dir/data" "$root_dir/" + fi + else + echo " -> No data/ found in artifact dir." + fi + + # Move status.json to root_dir if present + if [ -f "$artifact_dir/status.json" ]; then + if [ -e "$root_dir/status.json" ]; then + echo " -> 'status.json' already exists at root, leaving as-is." + else + echo " -> Moving status.json to $root_dir/" + mv "$artifact_dir/status.json" "$root_dir/" + fi + else + echo " -> No status.json found in artifact dir." + fi + + # After flattening, either delete the old 0/ dir or rename it (for unflattening) + if [ -d "$root_dir/0" ]; then + if [ "$DELETE_OLD_STRUCTURE" = true ]; then + echo " -> Removing obsolete '0/' structure directory." + rm -rf -- "$root_dir/0" + else + if [ -e "$root_dir/WEBAUTO_STRUCTURE_0" ]; then + echo " -> 'WEBAUTO_STRUCTURE_0' already exists, not renaming '0/'." + else + echo " -> Renaming '0/' to 'WEBAUTO_STRUCTURE_0/'." + mv "$root_dir/0" "$root_dir/WEBAUTO_STRUCTURE_0" + fi + fi + else + echo " -> No '0/' directory present after flatten (already removed or renamed)." + fi +} + +# --- Start downloads --- + +print_progress & +PROGRESS_PID=$! + +for id in "${IDS[@]}"; do + while [ "$(jobs -rp | wc -l)" -ge "$MAX_JOBS" ]; do + sleep 0.5 + done + download_one "$id" & +done + +wait +wait "$PROGRESS_PID" 2>/dev/null || true + +echo +echo "=== Download results ===" +sort "$DONE_FILE" +echo + +# --- Flatten phase (optional) --- + +if [ "$FLATTEN" = true ]; then + echo "Flattening downloaded datasets..." + for id in "${IDS[@]}"; do + echo "Processing $id ..." + flatten_one "$OUTPUT_DIR/$id" + echo + done + echo "Flattening done." +else + echo "Flattening disabled (--flatten=false)." +fi + +echo +echo "Detailed log at: $LOG_FILE" diff --git a/tools/auto_labeling_3d/scripts/flatten_webauto_artifacts.sh b/tools/auto_labeling_3d/scripts/flatten_webauto_artifacts.sh new file mode 100644 index 000000000..552eb32f9 --- /dev/null +++ b/tools/auto_labeling_3d/scripts/flatten_webauto_artifacts.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ $# -ne 1 ]; then + echo "Usage: $0 ROOT_DIR" + exit 1 +fi + +# Strip trailing slash if present +ROOT_DIR="${1%/}" + +if [ ! -d "$ROOT_DIR" ]; then + echo "Root directory '$ROOT_DIR' not found or not a directory." + exit 1 +fi + +echo "Flattening WebAuto artifacts under: $ROOT_DIR" +echo + +# Iterate over all immediate subdirectories in ROOT_DIR +shopt -s nullglob +subdirs=( "$ROOT_DIR"/* ) +shopt -u nullglob + +if [ ${#subdirs[@]} -eq 0 ]; then + echo "No subdirectories found under '$ROOT_DIR'. Nothing to do." + exit 0 +fi + +for root_dir in "${subdirs[@]}"; do + # Only process directories + if [ ! -d "$root_dir" ]; then + continue + fi + + name="$(basename "$root_dir")" + echo "Processing $name ..." + + # Detect structure dir: prefer 0/, but also handle already-renamed WEBAUTO_STRUCTURE_0 + structure_dir="" + if [ -d "$root_dir/0" ]; then + structure_dir="$root_dir/0" + elif [ -d "$root_dir/WEBAUTO_STRUCTURE_0" ]; then + structure_dir="$root_dir/WEBAUTO_STRUCTURE_0" + else + echo " -> No '0' or 'WEBAUTO_STRUCTURE_0' directory, skipping." + echo + continue + fi + + # Base path where webauto puts intermediate artifacts + base="$structure_dir/intermediate_artifacts/non_annotated_dataset" + if [ ! -d "$base" ]; then + echo " -> '$base' not found, skipping." + echo + continue + fi + + # Find the inner artifact directory (e.g. DB_J6Gen2_...) + shopt -s nullglob + candidates=( "$base"/* ) + shopt -u nullglob + + if [ "${#candidates[@]}" -eq 0 ]; then + echo " -> No artifact directory under '$base', skipping." + echo + continue + fi + + # Use the first candidate (assuming only one artifact dir) + artifact_dir="${candidates[0]}" + echo " -> Using artifact dir: $artifact_dir" + + # Move annotation directory if present and not already at root_dir + if [ -d "$artifact_dir/annotation" ]; then + if [ -e "$root_dir/annotation" ]; then + echo " -> 'annotation/' already exists at root, leaving as-is." + else + echo " -> Moving annotation/ to $root_dir/" + mv "$artifact_dir/annotation" "$root_dir/" + fi + else + echo " -> No annotation/ found in artifact dir." + fi + + # Move data directory if present and not already at root_dir + if [ -d "$artifact_dir/data" ]; then + if [ -e "$root_dir/data" ]; then + echo " -> 'data/' already exists at root, leaving as-is." + else + echo " -> Moving data/ to $root_dir/" + mv "$artifact_dir/data" "$root_dir/" + fi + else + echo " -> No data/ found in artifact dir." + fi + + # Move status.json if present and not already at root + if [ -f "$artifact_dir/status.json" ]; then + if [ -e "$root_dir/status.json" ]; then + echo " -> 'status.json' already exists at root, leaving as-is." + else + echo " -> Moving status.json to $root_dir/" + mv "$artifact_dir/status.json" "$root_dir/" + fi + else + echo " -> No status.json found in artifact dir." + fi + + # After flattening, rename 0/ → WEBAUTO_STRUCTURE_0 (if applicable) + if [ -d "$root_dir/0" ]; then + if [ -e "$root_dir/WEBAUTO_STRUCTURE_0" ]; then + echo " -> 'WEBAUTO_STRUCTURE_0' already exists, not renaming '0/'." + else + echo " -> Renaming '0/' to 'WEBAUTO_STRUCTURE_0/'." + mv "$root_dir/0" "$root_dir/WEBAUTO_STRUCTURE_0" + fi + else + # If we got here via WEBAUTO_STRUCTURE_0, nothing to rename + echo " -> Structure dir is already 'WEBAUTO_STRUCTURE_0', leaving as-is." + fi + + echo " -> Done with $name" + echo +done + +echo "All done." diff --git a/tools/auto_labeling_3d/scripts/id_list_example.txt b/tools/auto_labeling_3d/scripts/id_list_example.txt new file mode 100644 index 000000000..0a6a3f71b --- /dev/null +++ b/tools/auto_labeling_3d/scripts/id_list_example.txt @@ -0,0 +1,2 @@ +772cb8c8-a289-4e70-bae2-fc088b6ad279 +72179920-57dd-409c-9d0c-78de2eb2ae60 diff --git a/tools/auto_labeling_3d/scripts/unflatten_webauto_artifacts.sh b/tools/auto_labeling_3d/scripts/unflatten_webauto_artifacts.sh new file mode 100644 index 000000000..cad7322fb --- /dev/null +++ b/tools/auto_labeling_3d/scripts/unflatten_webauto_artifacts.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ $# -ne 1 ]; then + echo "Usage: $0 ROOT_DIR" + exit 1 +fi + +# Strip trailing slash if present +ROOT_DIR="${1%/}" + +if [ ! -d "$ROOT_DIR" ]; then + echo "Root directory '$ROOT_DIR' not found or not a directory." + exit 1 +fi + +echo "Unflattening WebAuto artifacts under: $ROOT_DIR" +echo + +# Iterate over all immediate subdirectories in ROOT_DIR +shopt -s nullglob +subdirs=( "$ROOT_DIR"/* ) +shopt -u nullglob + +if [ ${#subdirs[@]} -eq 0 ]; then + echo "No subdirectories found under '$ROOT_DIR'. Nothing to do." + exit 0 +fi + +for root_dir in "${subdirs[@]}"; do + # Only process directories + if [ ! -d "$root_dir" ]; then + continue + fi + + name="$(basename "$root_dir")" + echo "Processing $name ..." + + ann_src="$root_dir/annotation" + data_src="$root_dir/data" + status_src="$root_dir/status.json" + + # If none of the flattened items exist, nothing to do for this dir + if [ ! -d "$ann_src" ] && [ ! -d "$data_src" ] && [ ! -f "$status_src" ]; then + echo " -> No annotation/, data/, or status.json at root, skipping." + echo + continue + fi + + # Detect structure dir: prefer WEBAUTO_STRUCTURE_0, but also support plain 0 + structure_dir="" + used_placeholder=false + if [ -d "$root_dir/WEBAUTO_STRUCTURE_0" ]; then + structure_dir="$root_dir/WEBAUTO_STRUCTURE_0" + used_placeholder=true + elif [ -d "$root_dir/0" ]; then + structure_dir="$root_dir/0" + used_placeholder=false + else + echo " -> No 'WEBAUTO_STRUCTURE_0' or '0' directory found (maybe deleted during flatten). Skipping dataset." + echo + continue + fi + + base="$structure_dir/intermediate_artifacts/non_annotated_dataset" + if [ ! -d "$base" ]; then + echo " -> '$base' not found, cannot restore structure. Skipping." + echo + continue + fi + + # Find the inner artifact directory (e.g. DB_J6Gen2_...) + shopt -s nullglob + candidates=( "$base"/* ) + shopt -u nullglob + + if [ "${#candidates[@]}" -eq 0 ]; then + echo " -> No artifact directory under '$base', cannot restore. Skipping." + echo + continue + fi + + artifact_dir="${candidates[0]}" + echo " -> Using artifact dir: $artifact_dir" + + # Move annotation back if present at root and not already in artifact_dir + if [ -d "$ann_src" ]; then + if [ -e "$artifact_dir/annotation" ]; then + echo " -> 'annotation/' already exists in artifact dir, leaving both as-is." + else + echo " -> Moving annotation/ back into artifact dir." + mv "$ann_src" "$artifact_dir/" + fi + else + echo " -> No annotation/ at root." + fi + + # Move data back if present at root and not already in artifact_dir + if [ -d "$data_src" ]; then + if [ -e "$artifact_dir/data" ]; then + echo " -> 'data/' already exists in artifact dir, leaving both as-is." + else + echo " -> Moving data/ back into artifact dir." + mv "$data_src" "$artifact_dir/" + fi + else + echo " -> No data/ at root." + fi + + # Move status.json back if present at root and not already in artifact_dir + if [ -f "$status_src" ]; then + if [ -e "$artifact_dir/status.json" ]; then + echo " -> 'status.json' already exists in artifact dir, leaving both as-is." + else + echo " -> Moving status.json back into artifact dir." + mv "$status_src" "$artifact_dir/" + fi + else + echo " -> No status.json at root." + fi + + # If we used the placeholder dir, try to rename it back to 0 + if $used_placeholder; then + if [ -e "$root_dir/0" ]; then + echo " -> '0/' already exists, not renaming 'WEBAUTO_STRUCTURE_0/'." + else + echo " -> Renaming 'WEBAUTO_STRUCTURE_0/' back to '0/'." + mv "$root_dir/WEBAUTO_STRUCTURE_0" "$root_dir/0" + fi + fi + + echo " -> Done with $name" + echo +done + +echo "All done."