diff --git a/.github/workflows/script-collectors-e2e.yml b/.github/workflows/script-collectors-e2e.yml
new file mode 100644
index 0000000..cd432a5
--- /dev/null
+++ b/.github/workflows/script-collectors-e2e.yml
@@ -0,0 +1,253 @@
+name: Script Collectors E2E
+
+on:
+  push:
+    paths:
+      - "scripts/**"
+      - ".github/workflows/script-collectors-e2e.yml"
+  pull_request:
+    paths:
+      - "scripts/**"
+      - ".github/workflows/script-collectors-e2e.yml"
+  workflow_dispatch:
+
+jobs:
+  linux-collectors:
+    name: Linux scripts (bash/python/perl)
+    runs-on: ubuntu-latest
+    env:
+      SAMPLE_URL: https://github.com/TwoSevenOneT/EDR-Freeze/releases/download/v1.0-fbd43cf/EDRFreeze-msvc.exe
+      STUB_PORT: "18080"
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: stable
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      - name: Install Perl dependency
+        run: sudo apt-get update && sudo apt-get install -y libwww-perl
+
+      - name: Checkout thunderstorm-stub-server
+        uses: actions/checkout@v4
+        with:
+          repository: Nextron-Labs/thunderstorm-stub-server
+          path: thunderstorm-stub-server
+
+      - name: Build thunderstorm-stub-server
+        working-directory: thunderstorm-stub-server
+        run: go build -o "$RUNNER_TEMP/thunderstorm-stub-server" .
+
+      - name: Prepare test sample
+        run: |
+          set -euo pipefail
+          SAMPLE_DIR="$RUNNER_TEMP/script-collector-e2e-sample"
+          mkdir -p "$SAMPLE_DIR"
+          curl -L --fail "$SAMPLE_URL" -o "$SAMPLE_DIR/EDRFreeze-msvc.exe"
+          EXPECTED_SHA256="$(sha256sum "$SAMPLE_DIR/EDRFreeze-msvc.exe" | awk '{print $1}')"
+          echo "SAMPLE_DIR=$SAMPLE_DIR" >> "$GITHUB_ENV"
+          echo "EXPECTED_SHA256=$EXPECTED_SHA256" >> "$GITHUB_ENV"
+
+      - name: Start thunderstorm-stub-server
+        run: |
+          set -euo pipefail
+          UPLOADS_DIR="$RUNNER_TEMP/stub-uploads"
+          LOG_FILE="$RUNNER_TEMP/stub-audit.jsonl"
+          STUB_LOG="$RUNNER_TEMP/stub-server.log"
+          mkdir -p "$UPLOADS_DIR"
+          "$RUNNER_TEMP/thunderstorm-stub-server" \
+            --port "$STUB_PORT" \
+            --uploads-dir "$UPLOADS_DIR" \
+            --log-file "$LOG_FILE" \
+            >"$STUB_LOG" 2>&1 &
+          echo $! > "$RUNNER_TEMP/stub-server.pid"
+          echo "UPLOADS_DIR=$UPLOADS_DIR" >> "$GITHUB_ENV"
+          echo "STUB_LOG=$STUB_LOG" >> "$GITHUB_ENV"
+          for i in $(seq 1 60); do
+            if curl -fsS "http://127.0.0.1:$STUB_PORT/api/status" >/dev/null; then
+              exit 0
+            fi
+            sleep 1
+          done
+          echo "Stub server did not become ready in time" >&2
+          exit 1
+
+      - name: Run bash collector
+        run: |
+          bash ./scripts/thunderstorm-collector.sh \
+            --server 127.0.0.1 \
+            --port "$STUB_PORT" \
+            --dir "$SAMPLE_DIR" \
+            --source linux-sh-e2e \
+            --max-age 30 \
+            --max-size-kb 50000 \
+            --sync
+
+      - name: Run Python collector
+        run: |
+          python3 ./scripts/thunderstorm-collector.py \
+            -s 127.0.0.1 \
+            -p "$STUB_PORT" \
+            -d "$SAMPLE_DIR" \
+            -S linux-py-e2e
+
+      - name: Run Perl collector
+        run: |
+          perl ./scripts/thunderstorm-collector.pl \
+            --dir "$SAMPLE_DIR" \
+            --server 127.0.0.1 \
+            --port "$STUB_PORT" \
+            --source linux-pl-e2e
+
+      - name: Verify uploaded file integrity
+        run: |
+          python3 ./scripts/tests/verify_uploads.py \
+            --uploads-dir "$UPLOADS_DIR" \
+            --expected-sha256 "$EXPECTED_SHA256" \
+            --min-count 3 \
+            --timeout-seconds 120
+
+      - name: Stop thunderstorm-stub-server
+        if: always()
+        run: |
+          if [ -f "$RUNNER_TEMP/stub-server.pid" ]; then
+            kill "$(cat "$RUNNER_TEMP/stub-server.pid")" || true
+          fi
+
+      - name: Print stub server log
+        if: always()
+        run: |
+          if [ -n "${STUB_LOG:-}" ] && [ -f "$STUB_LOG" ]; then
+            echo "==== thunderstorm-stub-server log ===="
+            cat "$STUB_LOG"
+          fi
+
+  windows-collectors:
+    name: Windows scripts (PowerShell/Batch)
+    runs-on: windows-latest
+    env:
+      SAMPLE_URL: https://github.com/TwoSevenOneT/EDR-Freeze/releases/download/v1.0-fbd43cf/EDRFreeze-msvc.exe
+      STUB_PORT: "18080"
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: stable
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      - name: Checkout thunderstorm-stub-server
+        uses: actions/checkout@v4
+        with:
+          repository: Nextron-Labs/thunderstorm-stub-server
+          path: thunderstorm-stub-server
+
+      - name: Build thunderstorm-stub-server
+        shell: pwsh
+        working-directory: thunderstorm-stub-server
+        run: go build -o "$env:RUNNER_TEMP\thunderstorm-stub-server.exe" .
+
+      - name: Prepare test sample and directories
+        shell: pwsh
+        run: |
+          $sampleDir = "C:\ts-e2e-sample"
+          $uploadsDir = "C:\ts-e2e-uploads"
+          New-Item -ItemType Directory -Path $sampleDir -Force | Out-Null
+          New-Item -ItemType Directory -Path $uploadsDir -Force | Out-Null
+          $samplePath = Join-Path $sampleDir "EDRFreeze-msvc.exe"
+          Invoke-WebRequest -Uri $env:SAMPLE_URL -OutFile $samplePath
+          $hash = (Get-FileHash -Path $samplePath -Algorithm SHA256).Hash.ToLower()
+          "SAMPLE_DIR=$sampleDir" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+          "UPLOADS_DIR=$uploadsDir" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+          "EXPECTED_SHA256=$hash" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
+
+      - name: Run Windows collectors against thunderstorm-stub-server
+        shell: pwsh
+        run: |
+          $stdoutLogFile = Join-Path $env:RUNNER_TEMP "stub-server.stdout.log"
+          $stderrLogFile = Join-Path $env:RUNNER_TEMP "stub-server.stderr.log"
+          $auditFile = Join-Path $env:RUNNER_TEMP "stub-audit.jsonl"
+          $proc = $null
+          try {
+            $proc = Start-Process `
+              -FilePath "$env:RUNNER_TEMP\thunderstorm-stub-server.exe" `
+              -ArgumentList @("--port", $env:STUB_PORT, "--uploads-dir", $env:UPLOADS_DIR, "--log-file", $auditFile) `
+              -RedirectStandardOutput $stdoutLogFile `
+              -RedirectStandardError $stderrLogFile `
+              -PassThru
+
+            $ready = $false
+            for ($i = 0; $i -lt 60; $i++) {
+              try {
+                Invoke-RestMethod -Uri "http://127.0.0.1:$($env:STUB_PORT)/api/status" | Out-Null
+                $ready = $true
+                break
+              } catch {
+                Start-Sleep -Seconds 1
+              }
+            }
+            if (-not $ready) {
+              throw "Stub server did not become ready in time"
+            }
+
+            powershell.exe -NoProfile -ExecutionPolicy Bypass -File .\scripts\thunderstorm-collector.ps1 `
+              -ThunderstormServer 127.0.0.1 `
+              -ThunderstormPort $env:STUB_PORT `
+              -Folder $env:SAMPLE_DIR `
+              -Source windows-ps-e2e `
+              -MaxAge 30 `
+              -MaxSize 100
+            if ($LASTEXITCODE -ne 0) {
+              throw "PowerShell collector failed with exit code $LASTEXITCODE"
+            }
+
+            $env:THUNDERSTORM_SERVER = "127.0.0.1"
+            $env:THUNDERSTORM_PORT = $env:STUB_PORT
+            $env:URL_SCHEME = "http"
+            $env:COLLECT_DIRS = $env:SAMPLE_DIR
+            $env:RELEVANT_EXTENSIONS = ".exe"
+            $env:COLLECT_MAX_SIZE = "50000000"
+            $env:MAX_AGE = "30"
+            $env:SOURCE = "windows-bat-e2e"
+            $env:DEBUG = "1"
+            cmd /c scripts\thunderstorm-collector.bat
+            if ($LASTEXITCODE -ne 0) {
+              throw "Batch collector failed with exit code $LASTEXITCODE"
+            }
+
+            python .\scripts\tests\verify_uploads.py `
+              --uploads-dir "$env:UPLOADS_DIR" `
+              --expected-sha256 "$env:EXPECTED_SHA256" `
+              --min-count 2 `
+              --timeout-seconds 180
+            if ($LASTEXITCODE -ne 0) {
+              throw "Upload verification failed with exit code $LASTEXITCODE"
+            }
+          } finally {
+            if ($proc) {
+              Stop-Process -Id $proc.Id -Force -ErrorAction SilentlyContinue
+            }
+            if (Test-Path $stdoutLogFile) {
+              Write-Host "==== thunderstorm-stub-server stdout ===="
+              Get-Content -Path $stdoutLogFile
+            }
+            if (Test-Path $stderrLogFile) {
+              Write-Host "==== thunderstorm-stub-server stderr ===="
+              Get-Content -Path $stderrLogFile
+            }
+            if (Test-Path $auditFile) {
+              Write-Host "==== thunderstorm-stub-server audit jsonl ===="
+              Get-Content -Path $auditFile
+            }
+          }
diff --git a/.gitignore b/.gitignore
index b67f2fe..f2d3aff 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ curl.exe
 settings.json
 *.log
 go/dist/
+__pycache__/
diff --git a/scripts/AUDIT.md b/scripts/AUDIT.md
new file mode 100644
index 0000000..2aef35d
--- /dev/null
+++ b/scripts/AUDIT.md
@@ -0,0 +1,321 @@
+# Script Collector Audit — Bugs, Inconsistencies & Hardening Opportunities
+
+Audit of all four script collectors against the bash collector (`script-robustness` branch)
+and the Go collector as reference implementation.
+
+---
+
+## 🔴 Bugs
+
+### 1. Python: `source` parameter not URL-encoded
+**File:** `thunderstorm-collector.py`, line ~148
+```python
+source = f"?source={args.source}"
+```
+**Impact:** Source names with spaces, `&`, `#`, or other URL-special characters will corrupt the
+query string or silently truncate the source value at the server.
+
+**Compare:** The bash collector has `urlencode()`, the Go collector uses `url.QueryEscape()`,
+PowerShell uses `[uri]::EscapeDataString()`. Python is the only one missing this.
+
+**Fix:**
+```python
+from urllib.parse import quote
+source = f"?source={quote(args.source)}"
+```
+
+---
+
+### 2. Python: `Content-Disposition` filename not sanitized
+**File:** `thunderstorm-collector.py`, `submit_sample()`
+```python
+f'Content-Disposition: form-data; name="file"; filename="{filepath}"\r\n'
+```
+**Impact:** Filenames containing `"`, `\r`, `\n`, or `;` will break the multipart header,
+causing malformed requests or server-side parse errors. The same filepath is inserted raw.
+
+**Compare:** Bash has `sanitize_filename_for_multipart()`. Python and Perl do not sanitize.
+
+**Fix:**
+```python
+safe = filepath.replace('"', '_').replace(';', '_').replace('\\', '/')
+```
+
+---
+
+### 3. Python: `num_submitted` incremented even on failure
+**File:** `thunderstorm-collector.py`, `submit_sample()`, line ~100
+```python
+# Inside the retry loop, after conn.request():
+...
+global num_submitted
+num_submitted += 1   # ← This runs even if all retries failed
+```
+**Impact:** The final "Submitted" count is inflated — every file that enters `submit_sample()`
+is counted, regardless of whether it was actually accepted. Makes monitoring/reporting unreliable.
+
+**Compare:** Bash only increments on `submit_file` returning 0. Go tracks success/failure separately.
+
+**Fix:** Move the increment inside the `elif resp.status == 200: break` branch.
+
+---
+
+### 4. Python: `os.chdir()` in `process_dir()` is dangerous
+**File:** `thunderstorm-collector.py`, `process_dir()`
+```python
+os.chdir(workdir)
+...
+os.chdir(startdir)
+```
+**Impact:** `os.chdir()` changes the process-global working directory. If an exception occurs
+between the two `chdir()` calls, the CWD is left in an arbitrary directory. Also makes the
+function non-thread-safe (though single-threaded currently). If `workdir` disappears mid-walk
+(temp files), the function will crash and orphan the CWD.
+
+**Compare:** Bash uses `find -print0` (no chdir). Go uses `filepath.Walk()`. Perl also uses
+`chdir()` with the same risk.
+
+**Fix:** Use `os.path.join()` with absolute paths instead of `chdir()`. Or use `os.scandir()`/
+`os.walk()` which don't require changing CWD.
+
+---
+
+### 5. Perl: String comparison used for numeric size check
+**File:** `thunderstorm-collector.pl`, line ~100
+```perl
+if ( ( $size / 1024 / 1024 ) gt $max_size ) {
+```
+**Impact:** `gt` is the string comparison operator, not numeric. This does lexicographic
+comparison: `"9" gt "10"` is **true** (because `"9"` > `"1"` lexically). So a 9MB file
+would be skipped with `max_size=10`. Files between 1-9 MB would be incorrectly compared
+against multi-digit limits.
+
+**Fix:**
+```perl
+if ( ( $size / 1024 / 1024 ) > $max_size ) {
+```
+
+---
+
+### 6. Perl: String comparison for age check
+**File:** `thunderstorm-collector.pl`, line ~107
+```perl
+if ( $mdate lt ( $current_date - ($max_age * 86400) ) ) {
+```
+**Impact:** Same issue — `lt` is string comparison. Works coincidentally for large epoch
+timestamps (since they're all the same length currently), but will break in edge cases
+and is semantically wrong.
+
+**Fix:**
+```perl
+if ( $mdate < ( $current_date - ($max_age * 86400) ) ) {
+```
+
+---
+
+### 7. Perl: `source` parameter not URL-encoded
+**File:** `thunderstorm-collector.pl`, line ~47
+```perl
+$source = "?source=$source";
+```
+**Impact:** Same as Python bug #1. Source names with spaces or special characters corrupt the URL.
+
+**Fix:**
+```perl
+use URI::Escape;
+$source = "?source=" . uri_escape($source);
+```
+Or without additional module:
+```perl
+$source =~ s/([^A-Za-z0-9\-_.~])/sprintf("%%%02X", ord($1))/ge;
+$source = "?source=$source";
+```
+
+---
+
+### 8. Perl: `num_submitted` incremented even on failure
+**File:** `thunderstorm-collector.pl`, `submitSample()`, line ~127
+```perl
+$num_submitted++;
+```
+**Impact:** Incremented inside the eval block after `$ua->post()`, but before checking
+`$req->is_success`. Also incremented even if the request threw an exception caught by
+`eval`. The final count is inflated.
+
+**Fix:** Only increment when `$successful` is true:
+```perl
+if ($successful) {
+    $num_submitted++;
+    last;
+}
+```
+
+---
+
+### 9. Python: `retry_time` from header is a string, passed to `sleep()` without conversion
+**File:** `thunderstorm-collector.py`, `submit_sample()`
+```python
+retry_time = resp.headers.get("Retry-After", 30)
+time.sleep(retry_time)
+```
+**Impact:** `resp.headers.get()` returns a **string** (e.g. `"30"`). `time.sleep()` accepts
+a string in Python 3 and will raise `TypeError`. The fallback value `30` is an int and would
+work, but the actual header value will crash.
+
+**Fix:**
+```python
+retry_time = int(resp.headers.get("Retry-After", 30))
+```
+
+---
+
+### 10. Python: `--port` has no default value
+**File:** `thunderstorm-collector.py`, argparse definition
+```python
+parser.add_argument("-p", "--port", help="Port of the THOR Thunderstorm server. (Default: 8080)")
+```
+**Impact:** Despite the help text saying "Default: 8080", no `default=` is set. If `--port`
+is omitted, `args.port` is `None`, and the URL becomes `http://server:None/api/checkAsync`.
+The HTTP connection will fail with a confusing error.
+
+**Fix:**
+```python
+parser.add_argument("-p", "--port", type=int, default=8080, ...)
+```
+
+---
+
+## 🟡 Inconsistencies Between Collectors
+
+### 11. Filename in multipart: basename vs full path
+| Collector | Filename sent |
+|-----------|---------------|
+| **Bash** (fixed) | Full path (`/usr/sbin/nft`) |
+| **Go** | Full path (`filepath.Abs`) |
+| **Python** | Full path (but unsanitized) |
+| **Perl** | Basename only (LWP::UserAgent default) |
+| **PowerShell** | Full path (`$_.FullName`) |
+| **Batch** | Relative path (`%%F` from `FOR /R .`) |
+
+The Perl collector uses `LWP::UserAgent->post()` with `[ "file" => [ $filepath ] ]`, but
+LWP sends only the basename by default. This means the server audit log loses the original path.
+
+**Fix for Perl:**
+```perl
+Content => [ "file" => [ $filepath, $filepath ] ],
+# Second arg to arrayref is the filename override
+```
+
+---
+
+### 12. Max-age defaults vary wildly
+| Collector | Default max-age |
+|-----------|----------------|
+| Bash | 14 days |
+| Go | none (all files) |
+| Python | 14 days |
+| Perl | **3 days** |
+| PowerShell | **0** (disabled) |
+| Batch | **30 days** |
+
+Not necessarily a bug, but worth harmonizing. A 3-day default in Perl is very aggressive
+and will miss most files in forensic scenarios.
+
+---
+
+### 13. Max-size defaults vary
+| Collector | Default max-size |
+|-----------|-----------------|
+| Bash | 2 MB |
+| Go | 100 MB |
+| Python | 20 MB |
+| Perl | 10 MB |
+| PowerShell | 20 MB |
+| Batch | ~3 MB |
+
+The Go collector is 50x more generous than bash. Forensic users scanning for large
+executables may miss files with the script collectors.
+
+---
+
+### 14. Retry behavior varies
+| Collector | 503 retry | Error retry | Backoff |
+|-----------|-----------|-------------|---------|
+| Bash | Yes (Retry-After) | 3 retries, exp backoff | 2×2^n |
+| Go | Yes (Retry-After) | 3 retries, exp backoff | 4×2^n |
+| Python | Yes (but crashes, bug #9) | 3 retries, exp backoff | 2×2^n |
+| Perl | No 503 handling | 4 retries, exp backoff | 2×2^n |
+| PowerShell | Yes (Retry-After) | 3 retries, exp backoff | 2×2^n |
+| Batch | **No retry at all** | No | No |
+
+**Perl** doesn't handle HTTP 503 at all — it will count a 503 as a "success" because
+`$req->is_success` is false but `$num_submitted` is incremented anyway (bug #8), and
+it doesn't sleep or retry.
+
+---
+
+## 🔵 Hardening Opportunities
+
+### 15. Python: No validation of CLI arguments
+No checks for empty server, invalid port, negative max-age, etc. Contrast with bash
+which validates all numeric params.
+
+### 16. Perl: No `--max-age` or `--max-size` CLI flags
+These are hardcoded variables (`$max_age = 3`, `$max_size = 10`) with no command-line
+override. Users must edit the script to change them.
+
+### 17. Perl: `chdir()` without error recovery
+Same as Python bug #4. If `chdir` fails partway through recursion, the CWD is corrupted
+for all subsequent operations. The `chdir($startdir) or die` at the end of the loop is
+inside the foreach, not a finally/cleanup block.
+
+### 18. Python: `os.listdir()` instead of `os.walk()`
+The manual recursion with `os.listdir()` + `os.chdir()` reimplements what `os.walk()` does
+safely. Switching would eliminate bug #4 and simplify the code.
+
+### 19. Batch: Fire-and-forget uploads (`START /B curl`)
+```batch
+START /B curl -F file=@%%F ... -o nul -s ...
+```
+Uploads run as background processes with output discarded. No error checking, no retry,
+no submission count. If the server is down, every upload silently fails.
+
+### 20. PowerShell: Extensions hardcoded in script body, overwriting parameter
+```powershell
+param( ... [string[]]$Extensions ... )
+# Then later in "Presets":
+[string[]]$Extensions = @('.asp','.vbs', ...)  # Overwrites the param!
+```
+The parameter `$Extensions` from the command line is **overwritten** by the preset
+assignment on line ~117. Users cannot actually filter by extension via the CLI.
+
+### 21. PowerShell: Infinite retry on 503
+```powershell
+while ( $($StatusCode) -ne 200 ) {
+    ...
+    if ( $StatusCode -eq 503 ) {
+        # sleeps and retries forever
+    }
+}
+```
+There's no retry limit for 503. If the server is permanently overloaded, the collector
+hangs on a single file indefinitely. Non-503 errors have a 3-retry limit, but 503 does not.
+
+---
+
+## Summary
+
+| Severity | Count | Collectors affected |
+|----------|-------|-------------------|
+| 🔴 Bug | 10 | Python (5), Perl (4), PowerShell (1) |
+| 🟡 Inconsistency | 4 | All |
+| 🔵 Hardening | 7 | Python (3), Perl (2), Batch (1), PowerShell (1) |
+
+### Priority fixes (high impact, low effort):
+1. **Python: URL-encode source** — one line
+2. **Python: fix port default** — one line
+3. **Python: fix Retry-After type** — one line
+4. **Perl: `gt`→`>` and `lt`→`<`** — two characters each
+5. **Perl: URL-encode source** — one line
+6. **Python/Perl: fix submitted count** — move increment
+7. **PowerShell: Extensions preset overwrites param** — remove preset or use `if (!$PSBoundParameters.ContainsKey('Extensions'))`
diff --git a/scripts/README.md b/scripts/README.md
index 0b76e92..0dbfff3 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,134 +1,469 @@
 # THOR Thunderstorm Collector Scripts
 
-The Thunderstorm collector script library is a library of script examples that you can use for sample collection purposes.
+Lightweight, dependency-minimal scripts for collecting and submitting file samples to a [THOR Thunderstorm](https://www.nextron-systems.com/thor-thunderstorm/) server for YARA-based scanning.
 
-## thunderstorm-collector Shell Script
+Designed for forensic triage, incident response, and continuous monitoring — often on systems where installing a full agent is impractical or undesirable.
 
-A shell script for Linux.
+## Cross-Platform Test Matrix
 
-### Requirements
+All collectors are tested against a comprehensive matrix of operating systems and environments:
 
-- bash
-- wget
+### Linux Containers (podman/Docker)
 
-### Usage
+| Distro | Bash | Ash/sh | Python3 | Perl |
+|--------|------|--------|---------|------|
+| Alpine Linux | ✅ | ✅ | ✅ | ✅ |
+| Debian | ✅ | ✅ | ✅ | ✅ |
+| Ubuntu 22.04 | ✅ | ✅ | ✅ | ✅ |
+| Fedora | ✅ | ✅ | ✅ | ✅ |
+| CentOS Stream 9 | ✅ | ✅ | ✅ | ✅ |
+| Arch Linux | ✅ | ✅ | ✅ | ✅ |
+| openSUSE Tumbleweed | ✅ | ✅ | ✅ | ✅ |
+| Amazon Linux 2023 | ✅ | ✅ | ✅ | ✅ |
+| Rocky Linux 9 | ✅ | ✅ | ✅ | ✅ |
 
-You can run it like:
+### BSD VMs
+
+| OS | Bash | sh | Python3 | Perl |
+|----|------|-----|---------|------|
+| FreeBSD 14.3 | ✅ | ✅ | ✅ | ✅ |
+| OpenBSD 7.8 | ✅ | ✅ | — | ✅ |
+
+### ARM / Embedded
+
+| Device | OS | Bash | sh | Python3 | Perl |
+|--------|-----|------|-----|---------|------|
+| Raspberry Pi 5 (aarch64) | Debian 13 (trixie) | ✅ | ✅ | ✅ | ✅ |
+
+**Total: 47 tests, 47 passing** (tested 2025-02-25)
+
+---
+
+## Quick Start
 
 ```bash
-bash ./thunderstorm-collector.sh
-```
+# Linux/macOS — Bash
+bash thunderstorm-collector.sh --server thunderstorm.local --dir /home
+
+# Embedded Linux / BusyBox / Alpine — POSIX sh
+sh thunderstorm-collector-ash.sh --server thunderstorm.local --dir /tmp
+
+# Cross-platform — Python 3
+python3 thunderstorm-collector.py -s thunderstorm.local -d /home
+
+# Legacy systems — Python 2
+python thunderstorm-collector-py2.py -s thunderstorm.local -d /home
 
-The most common use case would be a collector script that looks e.g. for files that have been created or modified within the last X days and runs every X days.
+# Unix with Perl
+perl thunderstorm-collector.pl -s thunderstorm.local --dir /home
 
-### Tested On
+# Windows — PowerShell 3+
+powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer thunderstorm.local
 
-Successfully tested on:
+# Windows — PowerShell 2+
+powershell.exe -ep bypass .\thunderstorm-collector-ps2.ps1 -ThunderstormServer thunderstorm.local
 
-- Debian 10
+# Windows — Batch (legacy)
+thunderstorm-collector.bat
+```
+
+## Choosing the Right Collector
 
-## thunderstorm-collector Batch Script
+| Scenario | Recommended Collector |
+|---|---|
+| Modern Linux server or workstation | `thunderstorm-collector.sh` (Bash) |
+| macOS (any version) | `thunderstorm-collector.sh` (Bash) |
+| Embedded Linux / BusyBox / router / IoT | `thunderstorm-collector-ash.sh` (POSIX sh) |
+| Alpine Docker container | `thunderstorm-collector-ash.sh` (POSIX sh) |
+| Cross-platform, single script | `thunderstorm-collector.py` (Python 3) |
+| Legacy Linux (RHEL/CentOS 7, Debian 7/8) | `thunderstorm-collector-py2.py` (Python 2) |
+| Solaris, AIX, HP-UX | `thunderstorm-collector.pl` (Perl) |
+| Windows 7+ / Server 2008 R2+ (PS 3+) | `thunderstorm-collector.ps1` |
+| Windows 7 / Server 2008 R2 (PS 2) | `thunderstorm-collector-ps2.ps1` |
+| Windows XP / Server 2003 / no PowerShell | `thunderstorm-collector.bat` |
 
-A Batch script for Windows.
+---
 
-Warning: The FOR loop used in the Batch script tends to [leak memory](https://stackoverflow.com/questions/6330519/memory-leak-in-batch-for-loop). We couldn't figure out a clever hack to avoid this behaviour and therefore recommend using the Go based Thunderstorm Collector on Windows systems.
+## Collector Reference
 
-### Requirements
+### Bash Collector — `thunderstorm-collector.sh`
 
-- curl (Download [here](https://curl.haxx.se/windows/))
+The most feature-complete Linux/macOS collector. Supports both `curl` and `wget` as upload backends with automatic detection and fallback.
 
-#### Note on Windows 10
+**Use on:** Linux servers, workstations, macOS, WSL, any system with Bash 3.2+.
 
-Windows 10 already includes a curl since build 17063, so all versions newer than version 1709 (Redstone 3) from October 2017 already meet the requirements
+| Requirement | Detail |
+|---|---|
+| Shell | Bash 3.2+ |
+| Upload tool | `curl` or `wget` (at least one) |
+| TLS | Via curl/wget flags (`--ssl`) |
 
-#### Note on very old Windows versions
+**Features:**
+- Automatic curl/wget detection and fallback
+- Retry with exponential backoff (configurable)
+- Safe handling of filenames with spaces, quotes, and special characters (`find -print0`)
+- URL-encoded source identifiers
+- Syslog integration (`--syslog`), log file output (`--log-file`), dry-run mode (`--dry-run`)
 
-The last version of curl that works with Windows 7 / Windows 2008 R2 and earlier is v7.46.0 and can be still be downloaded from [here](https://bintray.com/vszakats/generic/download_file?file_path=curl-7.46.0-win32-mingw.7z)
+**Limitations:**
+- Not compatible with `ash`, `dash`, or plain `sh` — uses Bash arrays, `${var//pattern}`, `read -d ''`, C-style for loops
+- Requires `curl` or `wget` as external dependency
 
-### Usage
+**Tested Environments:**
 
-You can run it like:
+| Environment | Bash | curl | wget | Result |
+|---|---|---|---|---|
+| Fedora 43 | 5.2 | ✅ | ✅ | ✅ 28/28 tests, 10/10 files |
+| CentOS 7 | 4.2 | ✅ | ✅ | ✅ 10/10 files |
+| Debian 9 (Stretch) | 4.4 | ✅ | ✅ | ✅ 10/10 files |
+| Alpine 3.18 | 5.2 | ✅ | ✅ | ✅ 10/10 files |
+| Bash 3.2 (compiled, macOS-equivalent) | 3.2 | ✅ | ✅ | ✅ 10/10 files |
 
+**Usage:**
 ```bash
-thunderstorm-collector.bat
+bash thunderstorm-collector.sh --server thunderstorm.local
+bash thunderstorm-collector.sh --server 10.0.0.5 --ssl --dir /home --dir /tmp --max-age 7
+bash thunderstorm-collector.sh --help
+```
+
+---
+
+### POSIX sh / ash Collector — `thunderstorm-collector-ash.sh`
+
+A POSIX-compliant rewrite that runs on any Bourne-compatible shell. Designed for minimal environments where Bash is unavailable.
+
+**Use on:** BusyBox-based firmware, Alpine Docker containers, embedded Linux, network appliances, routers, IoT devices, stripped-down VMs.
+
+| Requirement | Detail |
+|---|---|
+| Shell | Any POSIX sh (`ash`, `dash`, `busybox sh`, `ksh`) |
+| Upload tool | `curl`, `wget`, or `nc` (at least one) |
+| Utilities | `find`, `wc`, `od`, `tr`, `sed`, `grep` (standard POSIX) |
+| TLS | Via curl/wget flags (`--ssl`) |
+
+**Features:**
+- Same CLI interface, retry logic, logging, and syslog support as the Bash collector
+- Three upload backends with automatic detection: `curl` → GNU `wget` → `nc` → BusyBox `wget`
+- URL-encoding via `od` + POSIX arithmetic (no Bash constructs)
+
+**Limitations:**
+- Filenames containing literal newline characters (`\n`) are not supported — the Bash version handles this via `find -print0` + `read -d ''`, which requires Bash. Extremely rare in practice.
+- BusyBox `wget --post-file` truncates binary files at the first NUL byte (0x00). The collector detects this and prefers `nc` automatically. If neither `curl` nor `nc` is available, BusyBox `wget` is used with a warning.
+
+**Tested Environments:**
+
+| Environment | Shell | curl | nc | wget | Result |
+|---|---|---|---|---|---|
+| BusyBox 1.36 | ash | — | ✅ | ⚠️ truncates | ✅ 10/10 files (via nc) |
+| Alpine 3.18 | ash | ✅ | ✅ | ✅ | ✅ 10/10 files |
+| Fedora 43 | dash | ✅ | ✅ | ✅ | ✅ 10/10 files |
+| Debian 9 (Stretch) | dash | ✅ | ✅ | ✅ | ✅ 10/10 files |
+
+**Usage:**
+```sh
+sh thunderstorm-collector-ash.sh --server thunderstorm.local
+sh thunderstorm-collector-ash.sh --server 10.0.0.5 --dir /var --dir /tmp --max-age 7
 ```
 
-### Tested On
+---
 
-Successfully tested on:
+### Python 3 Collector — `thunderstorm-collector.py`
 
-- Windows 10
-- Windows 2003
-- Windows XP
+Cross-platform collector using only the Python 3 standard library. No external packages required.
 
-## thunderstorm-collector PowerShell Script
+**Use on:** Any system with Python 3.4+ — Linux, macOS, Windows, BSD, Solaris. Good default choice when Python is available and you want a single script that works everywhere.
 
-A PowerShell script for Windows.
+| Requirement | Detail |
+|---|---|
+| Runtime | Python 3.4+ |
+| Dependencies | None (stdlib only: `http.client`, `ssl`, `mimetypes`) |
+| TLS | Built-in (`--tls`, `--insecure`) |
 
-### Requirements
+**Features:**
+- Built-in HTTP/HTTPS client (no curl/wget needed)
+- TLS with certificate verification or `--insecure` mode
+- Multipart form-data upload, URL-encoded source identifiers
+- Configurable skip patterns (regex), directory exclusions, file size/age limits
 
-- PowerShell version 3
+**Limitations:**
+- Python 2 not supported — use `thunderstorm-collector-py2.py` instead
+- Skip patterns and directory exclusions are configured in source code, not CLI flags
+- No syslog integration
 
-### Usage
+**Tested Environments:**
 
-You can run it like:
+| Environment | Python | Result |
+|---|---|---|
+| Fedora 43 | 3.14 | ✅ 10/10 files |
+| Alpine 3.18 | 3.11 | ✅ 10/10 files |
+| CentOS 7 | 3.6 | ✅ 10/10 files |
+| Debian 9 (Stretch) | 3.5 | ✅ 10/10 files (requires .format(), f-strings removed) |
 
+**Usage:**
 ```bash
-powershell.exe -ep bypass .\thunderstorm-collector.ps1
+python3 thunderstorm-collector.py -s thunderstorm.local -d /home -d /tmp
+python3 thunderstorm-collector.py -s thunderstorm.local -p 443 -t -k  # HTTPS, skip cert verify
 ```
 
-Collect files from a certain directory
+---
+
+### Python 2 Collector — `thunderstorm-collector-py2.py`
+
+Functionally equivalent to the Python 3 collector, using Python 2 standard library modules (`httplib`, `urllib`).
+
+**Use on:** Legacy systems where Python 3 is unavailable — RHEL/CentOS 6–7, Debian 7/8, older Solaris, AIX. Python 2 reached end-of-life in January 2020; prefer the Python 3 version when possible.
+
+| Requirement | Detail |
+|---|---|
+| Runtime | Python 2.7+ |
+| Dependencies | None (stdlib only: `httplib`, `urllib`, `ssl`) |
+| TLS | Built-in; full support requires Python 2.7.9+ (SNI, cert verification) |
+
+**Features:**
+- Same feature set as the Python 3 collector
+- Graceful TLS fallback for Python 2.7.0–2.7.8 (connects without SNI/cert verification)
+- Version guard: exits with a clear error if accidentally run under Python 3
 
+**Limitations:**
+- TLS on Python 2.7.0–2.7.8: connects but without SNI or certificate verification (limited by the `ssl` module)
+- Same configuration limitations as the Python 3 version
+
+**Tested Environments:**
+
+| Environment | Python | TLS | Result |
+|---|---|---|---|
+| CentOS 7 | 2.7.5 | ⚠️ no SNI (pre-2.7.9) | ✅ |
+
+**Usage:**
 ```bash
-powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer my-thunderstorm.local -Folder C:\ProgramData\Suspicious
+python thunderstorm-collector-py2.py -s thunderstorm.local -d /home
+python thunderstorm-collector-py2.py -s thunderstorm.local -p 443 -t -k
 ```
 
-Collect all files created within the last 24 hours from partition C:\
+---
+
+### Perl Collector — `thunderstorm-collector.pl`
+
+**Use on:** Unix/Linux systems where Perl is available but Python and Bash may not be. Common on older Solaris, AIX, HP-UX, and hardened systems that strip other scripting languages.
+
+| Requirement | Detail |
+|---|---|
+| Runtime | Perl 5.16+ |
+| Dependencies | `LWP::UserAgent` (not in Perl core since 5.14) |
+| TLS | Via LWP SSL configuration |
+
+**Features:**
+- Multipart form-data upload via LWP
+- URL-encoded source identifiers
+- Recursive directory scanning with configurable age and size limits
+- Debug mode
+
+**Limitations:**
+- Requires `LWP::UserAgent` (`apt-get install libwww-perl` / `yum install perl-libwww-perl`)
+- No retry logic on upload failure
+- Configuration (skip patterns, extensions, size/age limits) is in source code, not CLI flags
+- No syslog integration
+
+**Tested Environments:**
+
+| Environment | Perl | LWP | Result |
+|---|---|---|---|
+| Fedora 43 | 5.40 | ✅ | ✅ 10/10 files |
+| CentOS 7 | 5.16 | ✅ | ✅ 10/10 files |
+| Debian 9 (Stretch) | 5.24 | ✅ | ✅ 10/10 files |
+| Alpine 3.18 | 5.36 | ✅ | ✅ 10/10 files |
 
+**Usage:**
 ```bash
-powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer my-thunderstorm.local -MaxAge 1
+perl thunderstorm-collector.pl -s thunderstorm.internal.net
+perl thunderstorm-collector.pl --dir /home --server thunderstorm.internal.net --debug
 ```
 
-### Configuration
+---
 
-Please review the configuration section in the PowerShell script for more settings.
+### PowerShell 3+ Collector — `thunderstorm-collector.ps1`
 
-### Tested On
+**Use on:** Windows 7 SP1+, Windows Server 2008 R2 SP1+ — any system with PowerShell 3.0 or newer. This covers most modern Windows deployments.
 
-Successfully tested on:
+| Requirement | Detail |
+|---|---|
+| Runtime | PowerShell 3.0+ |
+| Dependencies | None |
+| TLS | Built-in (`-UseSSL` flag, enforces TLS 1.2+) |
 
-- Windows 10
-- Windows 7
+**Features:**
+- Recursive file scanning with extension, age, and size filtering
+- HTTPS support with TLS 1.2/1.3 enforcement (`-UseSSL`)
+- Source identifier for audit trail
+- Debug output (`-Debugging`)
+- Log file output
+- Retry with exponential backoff, 503 back-pressure handling with `Retry-After`
+- Auto-detection of Microsoft Defender ATP Live Response environment
 
-## thunderstorm-collector Perl Script
+**Limitations:**
+- PowerShell 2.0 is not supported — use `thunderstorm-collector-ps2.ps1` instead
+- Uses `Invoke-WebRequest` with `-UseBasicParsing`
 
-A Perl script collector.
+**Tested Environments:**
 
-### Requirements
+| Environment | PowerShell | .NET | Upload Integrity | Result |
+|---|---|---|---|---|
+| Windows 11 | 5.1.26100 | 4.x | ✅ MD5 verified (512KB binary w/ NUL bytes) | ✅ |
+| Fedora 43 (pwsh) | 7.4.6 | — | ✅ MD5 verified | ✅ |
 
-- Perl version 5
-- LWP::UserAgent
+**Usage:**
+```powershell
+# Basic scan
+powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer thunderstorm.local
 
-### Usage
+# HTTPS with TLS
+powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer thunderstorm.local -UseSSL
 
-You can run it like:
+# Scan specific folder, files modified in last 24 hours
+powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer ts.local -Folder C:\ProgramData -MaxAge 1
 
-```bash
-perl thunderstorm-collector.pl -- -s thunderstorm.internal.net
+# Debug mode
+powershell.exe -ep bypass .\thunderstorm-collector.ps1 -ThunderstormServer ts.local -Debugging
+```
+
+---
+
+### PowerShell 2+ Collector — `thunderstorm-collector-ps2.ps1`
+
+A PowerShell 2.0–compatible variant using `System.Net.HttpWebRequest` instead of `Invoke-WebRequest` (which was introduced in PowerShell 3.0).
+
+**Use on:** Windows 7 (pre-SP1 or without WMF 3.0 update), Windows Server 2008 R2 (pre-SP1), or any environment where PowerShell 2.0 is the only option and cannot be upgraded. Also works on all newer PowerShell versions.
+
+| Requirement | Detail |
+|---|---|
+| Runtime | PowerShell 2.0+ |
+| Dependencies | None |
+| TLS | Built-in (`-UseSSL` flag); requires .NET 4.5+ for TLS 1.2 |
+
+**Features:**
+- Same scanning and filtering as the PS 3+ version
+- Raw byte stream upload via `HttpWebRequest.GetRequestStream()` — no encoding layer, binary-safe
+- HTTPS with TLS 1.2+ enforcement via numeric `SecurityProtocol` enum values (works without .NET 4.5 type names)
+- Retry with exponential backoff, 503 back-pressure with `Retry-After`
+- PS 2–compatible file enumeration (`Where-Object { -not $_.PSIsContainer }` instead of `-File`)
+
+**Limitations:**
+- TLS 1.2 requires .NET Framework 4.5 or newer installed on the system. Windows 7 RTM ships with .NET 3.5; if .NET 4.5 is not installed, HTTPS connections will fail
+- No auto-detection of MDATP Live Response environment (rare on PS 2 systems)
+
+**Tested Environments:**
+
+| Environment | PowerShell | .NET | Upload Integrity | Result |
+|---|---|---|---|---|
+| Windows 11 | 5.1.26100 | 4.x | ✅ MD5 verified (512KB binary w/ NUL bytes) | ✅ |
+| Fedora 43 (pwsh) | 7.4.6 | — | ✅ MD5 verified | ✅ |
+
+**Usage:**
+```powershell
+# Basic scan
+powershell.exe -ep bypass .\thunderstorm-collector-ps2.ps1 -ThunderstormServer thunderstorm.local
+
+# HTTPS
+powershell.exe -ep bypass .\thunderstorm-collector-ps2.ps1 -ThunderstormServer thunderstorm.local -UseSSL
 ```
 
-Collect files from a certain directory
+---
+
+### Batch Collector — `thunderstorm-collector.bat`
+
+A minimal `cmd.exe` script for very old Windows systems.
+
+**Use on:** Windows XP, Server 2003, Server 2008 — systems where PowerShell is unavailable or restricted. Last resort for legacy environments.
+
+| Requirement | Detail |
+|---|---|
+| Runtime | cmd.exe (Windows XP+) |
+| Upload tool | `curl.exe` (included in Windows 10 1709+; download separately for older) |
+| TLS | Not supported |
+
+**Features:**
+- Minimal dependencies — runs on virtually any Windows version
+- `FORFILES`-based recursion that skips junctions/reparse points
+- `MAX_AGE` filtering via per-file date checks (works around `FORFILES /D -N` inverted semantics)
+
+**Limitations:**
+- **Known memory leak** in the `FOR` loop for directory traversal ([details](https://stackoverflow.com/questions/6330519/memory-leak-in-batch-for-loop)). For large scans, prefer a PowerShell or Go collector.
+- No TLS, limited error handling, hardcoded configuration
+- Requires `curl.exe` to be available in `PATH`
+
+> **Old Windows note:** The last curl version supporting Windows 7 / 2008 R2 and earlier is [v7.46.0](https://bintray.com/vszakats/generic/download_file?file_path=curl-7.46.0-win32-mingw.7z).
+
+**Usage:**
+```cmd
+thunderstorm-collector.bat
+```
+
+---
+
+## Harmonized CLI Flags
+
+All collectors use consistent command-line flags:
+
+| Flag | Bash | Ash | Python | Perl | PS3+ | PS2 | Batch |
+|------|------|-----|--------|------|------|-----|-------|
+| `-s/--server` | ✅ | ✅ | ✅ | ✅ | `-ThunderstormServer` | ✅ | (config) |
+| `-p/--port` | ✅ | ✅ | ✅ | ✅ | `-ThunderstormPort` | ✅ | (config) |
+| `-d/--dir` | ✅ | ✅ | ✅ | ✅ | `-Folder` | ✅ | (config) |
+| `--max-age` | ✅ | ✅ | ✅ | ✅ | `-MaxAge` | ✅ | ✅ |
+| `--max-size-kb` | ✅ | ✅ | ✅ | ✅ | — | — | — |
+| `--source` | ✅ | ✅ | `-S/--source` | ✅ | `-Source` | ✅ | — |
+| `--ssl` | ✅ | ✅ | `-t/--tls` | ✅ | `-UseSSL` | ✅ | — |
+| `-k/--insecure` | ✅ | ✅ | ✅ | ✅ | — | — | — |
+| `--sync` | ✅ | ✅ | ✅ | ✅ | — | — | — |
+| `--dry-run` | ✅ | ✅ | ✅ | ✅ | — | — | — |
+| `--retries` | ✅ | ✅ | ✅ | ✅ | — | — | — |
+| `--debug` | ✅ | ✅ | ✅ | ✅ | `-Debugging` | ✅ | — |
+| `--log-file` | ✅ | ✅ | — | — | `-LogFile` | ✅ | — |
+| `--syslog` | ✅ | ✅ | — | — | — | — | — |
+| `--quiet` | ✅ | ✅ | — | — | — | — | — |
+
+**Defaults:** `--max-age 14` (days), `--max-size-kb 2048` (KB), `--retries 3`
+
+## Configuration
+
+All collectors support basic configuration via command-line flags:
+
+| Parameter | Description | Default |
+|---|---|---|
+| Server | Hostname or IP of the Thunderstorm server | (required) |
+| Port | Server port | 8080 |
+| Directory | Path(s) to scan | `/` or `C:\` |
+| Max age | Only submit files modified within N days | 14 days |
+| Max size | Skip files larger than N KB | 2048 KB |
+| Source | Identifier string for audit trail | hostname |
+
+Advanced settings (skip patterns, extension filters, directory exclusions) are configured in the script source for most collectors.
+
+## Common Use Cases
+
+### Scheduled collection via cron (Linux)
 
 ```bash
-perl thunderstorm-collector.pl -- --dir /home --server thunderstorm.internal.net
+# Every 6 hours, scan /home and /tmp for files modified in the last 7 days
+0 */6 * * * bash /opt/thunderstorm-collector.sh --server ts.local --dir /home --dir /tmp --max-age 7 --quiet
 ```
 
-### Configuration
+### One-shot incident response triage
 
-Please review the configuration section in the Perl script for more settings like the maximum age, maximum file size or directory exclusions.
+```bash
+# Scan entire system, everything modified in the last 30 days
+bash thunderstorm-collector.sh --server 10.0.0.5 --dir / --max-age 30 --source "IR-case-2024-001"
+```
+
+### Windows scheduled task
 
-### Tested On
+```powershell
+schtasks /create /tn "ThunderstormCollector" /tr "powershell.exe -ep bypass C:\tools\thunderstorm-collector.ps1 -ThunderstormServer ts.local" /sc daily /st 02:00
+```
 
-Successfully tested on:
+### BusyBox / embedded system
 
-- Debian 10
\ No newline at end of file
+```sh
+# On a router or IoT device with only BusyBox
+sh /tmp/thunderstorm-collector-ash.sh --server 10.0.0.5 --dir /var --max-age 7
+```
diff --git a/scripts/tests/run_detection_tests.sh b/scripts/tests/run_detection_tests.sh
new file mode 100755
index 0000000..33eb06c
--- /dev/null
+++ b/scripts/tests/run_detection_tests.sh
@@ -0,0 +1,1107 @@
+#!/usr/bin/env bash
+# ============================================================================
+# Detection & Path Verification Tests
+#
+# Tests the full detection pipeline across all collector scripts:
+#
+# Positive tests:
+#   1. Malicious file → YARA content match (score > 0)
+#   2. Benign file → no match (no log entry)
+#   3. Benign file with malicious filename (/tmp/x) → filename IOC match
+#   4. Malicious file filtered by size → no event logged
+#   5. Same large file without size filter → detected
+#   6. Full path preserved in thunderstorm log
+#   7. Subdirectory recursion (files found at all levels)
+#
+# Negative tests (verifying collectors DON'T do what they shouldn't):
+#   8.  Directory scope — scanning /target must NOT pick up files from /decoy
+#   9.  Age filter — files older than --max-age must NOT be submitted
+#  10.  Extension filter (PS only) — exotic extensions must NOT be submitted
+#
+# Edge cases & robustness:
+#  12.  Empty files (0 bytes) — must not crash or produce false positives
+#  13.  Unicode filenames — must not crash or corrupt path
+#  14.  Symlinks — must NOT follow symlinks (security: no directory escape)
+#  15.  Broken/dangling symlinks — must not crash
+#  16.  Special characters in filenames (spaces, parens) — must handle correctly
+#  17.  Directories named after excluded paths — must not crash
+#  18.  Unreadable files (chmod 000) — must not crash, must process other files
+#
+# Server failure & retry:
+#  19.  Server unreachable — collector must exit gracefully, not crash
+#  20.  Late server startup — retry must succeed when server comes up mid-run
+#
+# Requires: thunderstorm-stub server with YARA support (-tags yara)
+#           running on localhost with both content rules and filename IOC rules
+# ============================================================================
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+COLLECTOR_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+STUB_PORT="${STUB_PORT:-18098}"
+STUB_URL="http://localhost:${STUB_PORT}"
+STUB_LOG="${STUB_LOG:-}"
+STUB_UPLOADS=""
+STUB_PID=""
+
+# Colours
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[1;36m'
+BOLD='\033[1m'
+RESET='\033[0m'
+
+TESTS_PASSED=0
+TESTS_FAILED=0
+TESTS_SKIPPED=0
+FAILED_NAMES=""
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+log() { printf "  %s\n" "$*"; }
+pass() { printf "  ${GREEN}PASS${RESET} %s\n" "$*"; TESTS_PASSED=$((TESTS_PASSED + 1)); }
+fail() { printf "  ${RED}FAIL${RESET} %s\n" "$*"; TESTS_FAILED=$((TESTS_FAILED + 1)); FAILED_NAMES="$FAILED_NAMES  - $1\n"; }
+skip() { printf "  ${YELLOW}SKIP${RESET} %s\n" "$*"; TESTS_SKIPPED=$((TESTS_SKIPPED + 1)); }
+
+# Find the stub server binary
+find_stub() {
+    local candidates=(
+        "${STUB_BIN_PATH:-}"
+        "$SCRIPT_DIR/../../../thunderstorm-stub-server/thunderstorm-stub"
+        "$SCRIPT_DIR/../../thunderstorm-stub-server/thunderstorm-stub"
+        "$(command -v thunderstorm-stub 2>/dev/null || true)"
+    )
+    for c in "${candidates[@]}"; do
+        [ -n "$c" ] && [ -x "$c" ] && echo "$c" && return
+    done
+    echo ""
+}
+
+STUB_BIN="$(find_stub)"
+
+# Start the stub server (once for the entire test run)
+start_stub() {
+    local tmpdir; tmpdir="$(mktemp -d /tmp/detection-test-XXXXXX)"
+    STUB_LOG="$tmpdir/thunderstorm.jsonl"
+    STUB_UPLOADS="$tmpdir/uploads"
+    mkdir -p "$STUB_UPLOADS"
+
+    local rules_dir="${STUB_RULES_DIR:-$(cd "$SCRIPT_DIR/../../../thunderstorm-stub-server/rules" 2>/dev/null && pwd)}"
+
+    "$STUB_BIN" \
+        -port "$STUB_PORT" \
+        -rules-dir "$rules_dir" \
+        -log-file "$STUB_LOG" \
+        -uploads-dir "$STUB_UPLOADS" \
+        >"$tmpdir/stub.log" 2>&1 &
+    STUB_PID=$!
+    sleep 2
+
+    if ! kill -0 "$STUB_PID" 2>/dev/null; then
+        echo "ERROR: stub server failed to start:" >&2
+        cat "$tmpdir/stub.log" >&2
+        exit 1
+    fi
+
+    local info; info="$(curl -s "${STUB_URL}/api/info" 2>/dev/null)"
+    if [ -z "$info" ]; then
+        echo "ERROR: stub server not responding on port $STUB_PORT" >&2
+        kill "$STUB_PID" 2>/dev/null
+        exit 1
+    fi
+    if echo "$info" | python3 -c "import sys,json; sys.exit(0 if not json.load(sys.stdin).get('stub_mode') else 1)" 2>/dev/null; then
+        return 0
+    else
+        echo "ERROR: stub server running in stub mode (no YARA). Build with -tags yara." >&2
+        kill "$STUB_PID" 2>/dev/null
+        exit 1
+    fi
+}
+
+stop_stub() {
+    [ -n "$STUB_PID" ] && kill "$STUB_PID" 2>/dev/null && wait "$STUB_PID" 2>/dev/null
+    STUB_PID=""
+}
+
+# Mark the current log position so query_log only sees entries from here forward.
+clear_log() {
+    mark_log_position
+}
+
+cleanup() {
+    stop_stub
+    # Kill any leftover retry-test stubs
+    for p in 18101 18102 18103 18104 18105; do
+        local pid; pid="$(lsof -ti :$p 2>/dev/null)"
+        [ -n "$pid" ] && kill "$pid" 2>/dev/null
+    done
+    rm -rf /tmp/detection-test-* /tmp/filename-ioc-test-* /tmp/retry-stub-* /tmp/collector-out-* 2>/dev/null
+}
+trap cleanup EXIT
+
+# Record the current log line count — used to scope queries to "after this point"
+mark_log_position() {
+    LOG_OFFSET="$(wc -l < "$STUB_LOG" 2>/dev/null || echo 0)"
+}
+
+# Query the JSONL log for entries matching a client_filename substring.
+# Only searches entries AFTER the last mark_log_position() call.
+# Returns the FIRST matching JSON line (empty string if not found).
+query_log() {
+    local filename_substr="$1"
+    python3 -c "
+import json, sys
+offset = int('${LOG_OFFSET:-0}')
+for i, line in enumerate(open('$STUB_LOG')):
+    if i < offset:
+        continue
+    line = line.strip()
+    if not line: continue
+    d = json.loads(line)
+    cf = d.get('subject', {}).get('client_filename', '')
+    if '$filename_substr' in cf:
+        print(line)
+        break
+" 2>/dev/null
+}
+
+# Extract a field from a log entry JSON
+log_field() {
+    local json_line="$1"
+    local field="$2"
+    echo "$json_line" | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+# Navigate dotted paths
+val = d
+for part in '$field'.split('.'):
+    if isinstance(val, dict):
+        val = val.get(part, '')
+    else:
+        val = ''
+        break
+print(val if val else '')
+" 2>/dev/null
+}
+
+# Get reason count from a log entry (JSONL uses 'reasons', not 'matches')
+match_count() {
+    local json_line="$1"
+    echo "$json_line" | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+print(d.get('reason_count', len(d.get('reasons', []))))
+" 2>/dev/null
+}
+
+# Get score from a log entry
+get_score() {
+    local json_line="$1"
+    echo "$json_line" | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+print(d.get('score', 0))
+" 2>/dev/null
+}
+
+# Check if a specific rule name appears in the log entry's reasons
+has_rule() {
+    local json_line="$1"
+    local rule_name="$2"
+    echo "$json_line" | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+reasons = d.get('reasons', [])
+for r in reasons:
+    sig = r.get('signature', {})
+    if sig.get('rule_name') == '$rule_name':
+        print('yes')
+        sys.exit(0)
+print('no')
+" 2>/dev/null
+}
+
+# ── Collector runners ───────────────────────────────────────────────────────
+
+run_bash() {
+    local dir="$1"; shift
+    # Extra args can override --max-age, --max-size-kb, etc.
+    bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+        --server localhost --port "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+run_python() {
+    local dir="$1"; shift
+    python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+        --server localhost --port "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+run_perl() {
+    local dir="$1"; shift
+    perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+        -s localhost -p "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+# Translate generic flags (--max-size-kb, --max-age) to PowerShell parameter names
+_translate_ps_args() {
+    local -n out_args=$1; shift
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --max-size-kb) out_args+=("-MaxSize" "$(( $2 / 1024 ))"); shift 2 ;;
+            --max-age)     out_args+=("-MaxAge" "$2"); shift 2 ;;
+            *)             out_args+=("$1"); shift ;;
+        esac
+    done
+}
+
+run_ps3() {
+    local dir="$1"; shift
+    local args=()
+    _translate_ps_args args "$@"
+    pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+        -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$dir" \
+        "${args[@]}" 2>&1
+}
+
+run_ps2() {
+    local dir="$1"; shift
+    local args=()
+    _translate_ps_args args "$@"
+    pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+        -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$dir" \
+        "${args[@]}" 2>&1
+}
+
+# List of collectors to test
+COLLECTORS=("bash" "python" "perl" "ps3" "ps2")
+
+# Small delay after collector to ensure stub has written to log
+sync_stub() {
+    sleep 1
+}
+
+run_collector() {
+    local name="$1"; shift
+    case "$name" in
+        bash)   run_bash "$@" ;;
+        python) run_python "$@" ;;
+        perl)   run_perl "$@" ;;
+        ps3)    run_ps3 "$@" ;;
+        ps2)    run_ps2 "$@" ;;
+    esac
+    sync_stub
+}
+
+# ── Test fixtures ───────────────────────────────────────────────────────────
+
+MALICIOUS_CONTENT="THUNDERSTORM_TEST_MATCH_STRING"
+BENIGN_CONTENT="completely harmless content"
+
+# Create per-collector fixture directories with uniquely named files
+setup_collector_fixtures() {
+    local collector="$1"
+    local base; base="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$base/malicious"
+    echo "$MALICIOUS_CONTENT" > "$base/malicious/evil-${collector}.exe"
+
+    mkdir -p "$base/benign"
+    echo "$BENIGN_CONTENT" > "$base/benign/clean-${collector}.txt"
+
+    mkdir -p "$base/large"
+    dd if=/dev/zero bs=1024 count=3072 2>/dev/null | tr '\0' 'A' > "$base/large/big-${collector}.tmp"
+    echo "$MALICIOUS_CONTENT" >> "$base/large/big-${collector}.tmp"
+
+    echo "$base"
+}
+
+# ============================================================================
+# TEST CASES
+# ============================================================================
+
+# ── 1. Malicious file detected ─────────────────────────────────────────────
+test_malicious_detected() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    run_collector "$collector" "$fixtures/malicious" --max-age 30 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "evil-${collector}.exe")"
+    if [ -z "$entry" ]; then
+        fail "$collector/malicious-detected: no log entry for evil-${collector}.exe"
+        return
+    fi
+
+    local score; score="$(get_score "$entry")"
+    if [ "$score" -gt 0 ] 2>/dev/null; then
+        pass "$collector/malicious-detected: score=$score"
+    else
+        fail "$collector/malicious-detected: expected score > 0, got $score"
+    fi
+
+    local has_test_rule; has_test_rule="$(has_rule "$entry" "TestRule")"
+    if [ "$has_test_rule" = "yes" ]; then
+        pass "$collector/malicious-rule: TestRule matched"
+    else
+        fail "$collector/malicious-rule: TestRule not found in matches"
+    fi
+}
+
+# ── 2. Benign file — no match ──────────────────────────────────────────────
+test_benign_no_match() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    run_collector "$collector" "$fixtures/benign" --max-age 30 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "clean-${collector}.txt")"
+    # Benign files produce no log entry (not submitted / no YARA match)
+    # This is the expected behavior - no finding = no entry
+    if [ -z "$entry" ]; then
+        pass "$collector/benign-no-match: 0 matches (no log entry = benign)"
+        return
+    fi
+
+    # If there IS an entry, verify it has 0 matches
+    local mc; mc="$(match_count "$entry")"
+    if [ "$mc" -eq 0 ] 2>/dev/null; then
+        pass "$collector/benign-no-match: 0 matches"
+    else
+        fail "$collector/benign-no-match: expected 0 matches, got $mc"
+    fi
+}
+
+# ── 3. Filename IOC match (/tmp/x) ─────────────────────────────────────────
+test_filename_ioc() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    # Create a directory at /tmp/filename-ioc-test with a single file 'x'.
+    # The collector scans this small directory and submits '/tmp/filename-ioc-test/x'.
+    # The filename IOC rule matches /tmp/<single-char> paths, so we also test via
+    # a direct curl upload with the exact path "/tmp/x" to verify the rule fires.
+    local ioc_dir="/tmp/filename-ioc-test-$$"
+    mkdir -p "$ioc_dir"
+    echo "$BENIGN_CONTENT" > "$ioc_dir/testfile"
+
+    # First: submit via the collector to verify the upload works
+    run_collector "$collector" "$ioc_dir" --max-age 30 >/dev/null 2>&1 || true
+
+    # Second: submit the same file directly with filename="/tmp/x" via curl
+    # This is what matters — the full path must trigger the rule
+    curl -s -X POST "${STUB_URL}/api/check?source=filename-ioc-$collector" \
+        -F "file=@${ioc_dir}/testfile;filename=/tmp/x" >/dev/null 2>&1
+
+    local entry; entry="$(query_log "/tmp/x")"
+    if [ -z "$entry" ]; then
+        fail "$collector/filename-ioc: no log entry containing /tmp/x"
+        rm -rf "$ioc_dir"
+        return
+    fi
+
+    local has_ioc; has_ioc="$(has_rule "$entry" "FilenameIOC_Tmp_SingleChar")"
+    if [ "$has_ioc" = "yes" ]; then
+        pass "$collector/filename-ioc: FilenameIOC_Tmp_SingleChar matched on /tmp/x"
+    else
+        fail "$collector/filename-ioc: FilenameIOC_Tmp_SingleChar not found for /tmp/x"
+    fi
+
+    rm -rf "$ioc_dir"
+}
+
+# ── 4. Large malicious file filtered by size → no event ─────────────────────
+test_size_filter_no_event() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    # Set max size to 1 MB / 1024 KB — the large file is ~3 MB
+    run_collector "$collector" "$fixtures/large" --max-age 30 --max-size-kb 1024 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "big-${collector}.tmp")"
+    if [ -z "$entry" ]; then
+        pass "$collector/size-filter-no-event: big-${collector}.tmp correctly filtered (no log entry)"
+    else
+        fail "$collector/size-filter-no-event: big-${collector}.tmp should not appear in log (was uploaded despite size filter)"
+    fi
+}
+
+# ── 4b. Same large malicious file without size filter → detected ────────────
+test_large_malicious_detected() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    # Override size filter to let the ~3 MB file through
+    run_collector "$collector" "$fixtures/large" --max-age 30 --max-size-kb 4096 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "big-${collector}.tmp")"
+    if [ -z "$entry" ]; then
+        fail "$collector/large-malicious-detected: no log entry for big-${collector}.tmp"
+        return
+    fi
+
+    local score; score="$(get_score "$entry")"
+    if [ "$score" -gt 0 ] 2>/dev/null; then
+        pass "$collector/large-malicious-detected: score=$score (detected without size filter)"
+    else
+        fail "$collector/large-malicious-detected: expected score > 0, got $score"
+    fi
+}
+
+# ── 5. Full path preserved in log ──────────────────────────────────────────
+test_full_path_in_log() {
+    local collector="$1"
+    local fixtures="$2"
+    clear_log
+
+    run_collector "$collector" "$fixtures/malicious" --max-age 30 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "evil-${collector}.exe")"
+    if [ -z "$entry" ]; then
+        fail "$collector/full-path: no log entry for evil-${collector}.exe"
+        return
+    fi
+
+    local cf; cf="$(log_field "$entry" "subject.client_filename")"
+
+    # Must contain the full path, not just the basename
+    if echo "$cf" | grep -q "/malicious/evil-${collector}.exe$"; then
+        pass "$collector/full-path: client_filename=$cf"
+    else
+        fail "$collector/full-path: expected full path ending in /malicious/evil-${collector}.exe, got '$cf'"
+    fi
+}
+
+# ── 8. Directory scope — only scans target directory ────────────────────────
+# Verifies that scanning /target does NOT pick up files from /decoy
+test_directory_scope() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    # Create two sibling directories: target and decoy
+    mkdir -p "$fixtures/target" "$fixtures/decoy"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/target/in-scope-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/decoy/out-of-scope-${collector}.exe"
+
+    # Scan ONLY the target directory
+    run_collector "$collector" "$fixtures/target" --max-age 30 >/dev/null 2>&1 || true
+
+    # The in-scope file MUST be in the log
+    local in_entry; in_entry="$(query_log "in-scope-${collector}.exe")"
+    if [ -z "$in_entry" ]; then
+        fail "$collector/dir-scope: in-scope file not found in log"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # The out-of-scope file MUST NOT be in the log
+    local out_entry; out_entry="$(query_log "out-of-scope-${collector}.exe")"
+    if [ -n "$out_entry" ]; then
+        fail "$collector/dir-scope: out-of-scope file WAS submitted (directory escape!)"
+    else
+        pass "$collector/dir-scope: only target directory scanned"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 9. Age filter — old files must not be collected ─────────────────────────
+# Creates a recent file and an old file (backdated via touch -t),
+# scans with --max-age 1, and verifies only the recent file is submitted.
+test_age_filter() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/aged"
+
+    # Recent file (now) — should be submitted
+    echo "$MALICIOUS_CONTENT" > "$fixtures/aged/recent-${collector}.exe"
+
+    # Old file (60 days ago) — should NOT be submitted with --max-age 1
+    echo "$MALICIOUS_CONTENT" > "$fixtures/aged/old-${collector}.exe"
+    touch -t "$(date -d '60 days ago' '+%Y%m%d%H%M.%S')" "$fixtures/aged/old-${collector}.exe"
+
+    # Verify the timestomping worked
+    local old_mtime; old_mtime="$(stat -c %Y "$fixtures/aged/old-${collector}.exe")"
+    local now; now="$(date +%s)"
+    local age_days=$(( (now - old_mtime) / 86400 ))
+    if [ "$age_days" -lt 30 ]; then
+        skip "$collector/age-filter: timestomping failed (age=$age_days days, expected >= 60)"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Scan with --max-age 1 (only files modified in the last day)
+    run_collector "$collector" "$fixtures/aged" --max-age 1 >/dev/null 2>&1 || true
+
+    # Recent file MUST be in the log
+    local recent_entry; recent_entry="$(query_log "recent-${collector}.exe")"
+    if [ -z "$recent_entry" ]; then
+        fail "$collector/age-filter: recent file not found in log"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Old file MUST NOT be in the log
+    local old_entry; old_entry="$(query_log "old-${collector}.exe")"
+    if [ -n "$old_entry" ]; then
+        fail "$collector/age-filter: old file (60d ago) WAS submitted despite --max-age 1"
+    else
+        pass "$collector/age-filter: old file correctly skipped (--max-age 1)"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 10. Extension filter (PS only) — unknown extensions not submitted ───────
+# PowerShell collectors have a default extension whitelist.
+# Files with exotic extensions (.xyz) should NOT be submitted.
+test_extension_filter() {
+    local collector="$1"
+    clear_log
+
+    # Only applies to PowerShell collectors
+    case "$collector" in
+        ps3|ps2) ;;
+        *)
+            skip "$collector/ext-filter: N/A (no extension filter)"
+            return ;;
+    esac
+
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+    mkdir -p "$fixtures/exttest"
+
+    # File with a known extension — should be submitted
+    echo "$MALICIOUS_CONTENT" > "$fixtures/exttest/known-${collector}.exe"
+
+    # File with an exotic extension — should NOT be submitted
+    echo "$MALICIOUS_CONTENT" > "$fixtures/exttest/exotic-${collector}.xyz"
+
+    run_collector "$collector" "$fixtures/exttest" --max-age 30 >/dev/null 2>&1 || true
+
+    # Known extension file MUST be in log
+    local known_entry; known_entry="$(query_log "known-${collector}.exe")"
+    if [ -z "$known_entry" ]; then
+        fail "$collector/ext-filter: .exe file not found in log"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Exotic extension file MUST NOT be in log
+    local exotic_entry; exotic_entry="$(query_log "exotic-${collector}.xyz")"
+    if [ -n "$exotic_entry" ]; then
+        fail "$collector/ext-filter: .xyz file WAS submitted (should be filtered by extension)"
+    else
+        pass "$collector/ext-filter: .xyz file correctly filtered"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 11. Subdirectory recursion — files in subdirectories are found ──────────
+# Verifies that the collector descends into subdirectories.
+test_subdirectory_recursion() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/root/sub1/sub2"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/root/top-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/root/sub1/mid-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/root/sub1/sub2/deep-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/root" --max-age 30 >/dev/null 2>&1 || true
+
+    local top; top="$(query_log "top-${collector}.exe")"
+    local mid; mid="$(query_log "mid-${collector}.exe")"
+    local deep; deep="$(query_log "deep-${collector}.exe")"
+
+    if [ -n "$top" ] && [ -n "$mid" ] && [ -n "$deep" ]; then
+        pass "$collector/subdir-recursion: files found at all 3 levels"
+    else
+        local missing=""
+        [ -z "$top" ] && missing="$missing top"
+        [ -z "$mid" ] && missing="$missing mid"
+        [ -z "$deep" ] && missing="$missing deep"
+        fail "$collector/subdir-recursion: missing files:$missing"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 12. Empty files — should be submitted but produce no YARA match ─────────
+test_empty_file() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/empty"
+    : > "$fixtures/empty/empty-${collector}.exe"   # 0 bytes
+
+    run_collector "$collector" "$fixtures/empty" --max-age 30 >/dev/null 2>&1 || true
+
+    # Empty files: some collectors may skip 0-byte files, others may submit them.
+    # Either way, they must NOT crash and must NOT produce a false positive.
+    local entry; entry="$(query_log "empty-${collector}.exe")"
+    if [ -n "$entry" ]; then
+        local score; score="$(get_score "$entry")"
+        # Empty files may score > 0 due to filename IOC rules (e.g. path in /tmp).
+        # That's not a content-based false positive — it's correct filename matching.
+        # Verify no CONTENT-based rule matched (TestRule should NOT match empty files).
+        local has_test_rule; has_test_rule="$(has_rule "$entry" "TestRule")"
+        if [ "$has_test_rule" = "yes" ]; then
+            fail "$collector/empty-file: TestRule matched empty file (content false positive!)"
+        else
+            pass "$collector/empty-file: submitted, score=$score (no content match)"
+        fi
+    else
+        pass "$collector/empty-file: empty file skipped (acceptable behavior)"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 13. Unicode filenames — must not crash or corrupt the path ──────────────
+test_unicode_filename() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/unicode"
+    # File with Unicode chars in name
+    echo "$MALICIOUS_CONTENT" > "$fixtures/unicode/données-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/unicode" --max-age 30 >/dev/null 2>&1 || true
+
+    local entry; entry="$(query_log "données-${collector}.exe")"
+    if [ -n "$entry" ]; then
+        local score; score="$(get_score "$entry")"
+        if [ "$score" -gt 0 ] 2>/dev/null; then
+            pass "$collector/unicode-filename: detected with score=$score"
+        else
+            pass "$collector/unicode-filename: submitted (score=$score)"
+        fi
+    else
+        # Some collectors may not handle Unicode — acceptable to skip
+        skip "$collector/unicode-filename: file not submitted (Unicode handling varies)"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 14. Symlinks — must NOT follow symlinks (security) ──────────────────────
+# A symlink inside the scan directory pointing to a file outside should NOT
+# be followed, as it could be used to exfiltrate data or escape the scan scope.
+test_symlink_not_followed() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/scandir" "$fixtures/outside"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/outside/secret-${collector}.exe"
+
+    # Create a real file in the scan dir (control)
+    echo "$MALICIOUS_CONTENT" > "$fixtures/scandir/real-${collector}.exe"
+
+    # Create a symlink in the scan dir pointing to the file outside
+    ln -s "$fixtures/outside/secret-${collector}.exe" "$fixtures/scandir/link-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/scandir" --max-age 30 >/dev/null 2>&1 || true
+
+    # Real file MUST be submitted
+    local real_entry; real_entry="$(query_log "real-${collector}.exe")"
+    if [ -z "$real_entry" ]; then
+        fail "$collector/symlink: real file not found in log"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Symlinked file MUST NOT be submitted
+    local link_entry; link_entry="$(query_log "secret-${collector}.exe")"
+    local link_entry2; link_entry2="$(query_log "link-${collector}.exe")"
+    if [ -n "$link_entry" ] || [ -n "$link_entry2" ]; then
+        fail "$collector/symlink: symlinked file WAS followed (security risk!)"
+    else
+        pass "$collector/symlink: symlinks correctly skipped"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 15. Broken symlinks — must not crash ────────────────────────────────────
+test_broken_symlink() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/broken"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/broken/real-${collector}.exe"
+
+    # Create a dangling symlink (target doesn't exist)
+    ln -s "/nonexistent/file-${collector}.exe" "$fixtures/broken/dangling-${collector}.exe"
+
+    # Must not crash
+    run_collector "$collector" "$fixtures/broken" --max-age 30 >/dev/null 2>&1 || true
+
+    # Real file should still be processed
+    local entry; entry="$(query_log "real-${collector}.exe")"
+    if [ -n "$entry" ]; then
+        pass "$collector/broken-symlink: collector survived dangling symlink"
+    else
+        fail "$collector/broken-symlink: real file not found (collector may have crashed)"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 16. Special characters in filenames ─────────────────────────────────────
+# Spaces, quotes, and other shell-sensitive characters must not break the collector.
+test_special_chars_filename() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/special"
+    # File with spaces
+    echo "$MALICIOUS_CONTENT" > "$fixtures/special/has spaces-${collector}.exe"
+    # File with parentheses
+    echo "$MALICIOUS_CONTENT" > "$fixtures/special/parens(1)-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/special" --max-age 30 >/dev/null 2>&1 || true
+
+    local space_entry; space_entry="$(query_log "has spaces-${collector}.exe")"
+    local paren_entry; paren_entry="$(query_log "parens(1)-${collector}.exe")"
+
+    local found=0
+    [ -n "$space_entry" ] && found=$((found + 1))
+    [ -n "$paren_entry" ] && found=$((found + 1))
+
+    if [ "$found" -eq 2 ]; then
+        pass "$collector/special-chars: spaces and parens handled ($found/2 found)"
+    elif [ "$found" -gt 0 ]; then
+        pass "$collector/special-chars: partial handling ($found/2 found)"
+    else
+        fail "$collector/special-chars: no files with special chars submitted"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 17. Hard folder exclusions — /proc, /sys, /dev must be skipped ──────────
+# We can't actually scan /proc etc. in tests, but we can create directories
+# NAMED like excluded paths inside our test tree and verify they're skipped.
+# NOTE: This test only applies to collectors that check basename matches.
+# Most collectors use absolute path prefix matching, so /tmp/test/proc/ won't
+# trigger the exclusion. This test verifies the collector doesn't crash when
+# scanning a directory tree with suspicious-looking names.
+test_excluded_dirs_survive() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    # Create a tree with directories named after excluded paths
+    mkdir -p "$fixtures/scanme/proc" "$fixtures/scanme/dev" "$fixtures/scanme/normal"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/scanme/proc/inside-proc-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/scanme/dev/inside-dev-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/scanme/normal/legit-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/scanme" --max-age 30 >/dev/null 2>&1 || true
+
+    # The "normal" file MUST be found (prove collector ran)
+    local legit; legit="$(query_log "legit-${collector}.exe")"
+    if [ -z "$legit" ]; then
+        fail "$collector/excluded-dirs: legit file not found (collector may have crashed)"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Files inside "proc" and "dev" subdirs: we don't assert either way,
+    # since hard exclusions are typically for absolute paths (/proc, /dev).
+    # The point is the collector survives and processes other files.
+    pass "$collector/excluded-dirs: collector survived dirs named proc/dev"
+
+    rm -rf "$fixtures"
+}
+
+# ── 18. No-permission files — must not crash ───────────────────────────────
+test_unreadable_file() {
+    local collector="$1"
+    clear_log
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+
+    mkdir -p "$fixtures/perms"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/perms/readable-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/perms/unreadable-${collector}.exe"
+    chmod 000 "$fixtures/perms/unreadable-${collector}.exe"
+
+    run_collector "$collector" "$fixtures/perms" --max-age 30 >/dev/null 2>&1 || true
+
+    # Readable file should still be processed
+    local entry; entry="$(query_log "readable-${collector}.exe")"
+    if [ -n "$entry" ]; then
+        pass "$collector/unreadable-file: collector survived unreadable file"
+    else
+        fail "$collector/unreadable-file: readable file not found (collector may have crashed)"
+    fi
+
+    # Cleanup (restore perms so rm works)
+    chmod 644 "$fixtures/perms/unreadable-${collector}.exe" 2>/dev/null
+    rm -rf "$fixtures"
+}
+
+# ── 19. Server unavailable then recovery — retry must succeed ───────────────
+# Start the collector against a dead port, then start the stub mid-run.
+# The collector should retry and eventually succeed.
+test_retry_on_late_server() {
+    local collector="$1"
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+    mkdir -p "$fixtures/retry"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/retry/retry-${collector}.exe"
+
+    # Use a unique port per collector so concurrent cleanup doesn't conflict
+    local retry_port
+    case "$collector" in
+        bash)   retry_port=18101 ;;
+        python) retry_port=18102 ;;
+        perl)   retry_port=18103 ;;
+        ps3)    retry_port=18104 ;;
+        ps2)    retry_port=18105 ;;
+    esac
+    local retry_log; retry_log="$(mktemp /tmp/retry-stub-XXXXXX.jsonl)"
+
+    # Start the collector against the dead port (it will retry)
+    local collector_out; collector_out="$(mktemp /tmp/collector-out-XXXXXX.txt)"
+
+    # Start the stub server FIRST on the retry port, but with a delayed start.
+    # We use a wrapper that waits 2 seconds before launching the stub.
+    local stub_bin="${STUB_BIN:-/home/neo/.openclaw/workspace/projects/thunderstorm-stub-server/thunderstorm-stub}"
+    local stub_rules="${STUB_RULES_DIR:-/home/neo/.openclaw/workspace/projects/thunderstorm-stub-server/rules}"
+
+    # Launch delayed stub in background.
+    # All collectors send a begin marker with a single retry after 2s on failure.
+    # Connection refused is instant, so: attempt 1 at ~0s, sleep 2s, attempt 2 at ~2s.
+    # The stub takes ~0.5-1s to load YARA rules and bind, so we must start it
+    # early enough that it's listening before the 2nd begin marker attempt.
+    # Starting at 0.3s gives the stub ~1.7s to initialize before t=2s.
+    ( sleep 0.3 && "$stub_bin" -port "$retry_port" -rules-dir "$stub_rules" -log-file "$retry_log" ) \
+        > /dev/null 2>&1 &
+    local stub_pid=$!
+
+    # Run the collector synchronously — it will fail first, then succeed on retry.
+    # --retries 5 gives enough attempts for the stub to come up after 2s delay.
+    case "$collector" in
+        bash)
+            timeout 30 bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$retry_port" --dir "$fixtures/retry" \
+                --max-age 30 --retries 5 > "$collector_out" 2>&1 || true
+            ;;
+        python)
+            timeout 30 python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                --server localhost --port "$retry_port" --dir "$fixtures/retry" \
+                --max-age 30 --retries 5 > "$collector_out" 2>&1 || true
+            ;;
+        perl)
+            timeout 30 perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$retry_port" --dir "$fixtures/retry" \
+                --max-age 30 --retries 5 > "$collector_out" 2>&1 || true
+            ;;
+        ps3)
+            timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$retry_port" -Folder "$fixtures/retry" \
+                -MaxAge 30 > "$collector_out" 2>&1 || true
+            ;;
+        ps2)
+            timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$retry_port" -Folder "$fixtures/retry" \
+                -MaxAge 30 > "$collector_out" 2>&1 || true
+            ;;
+    esac
+
+    # Check if the file was eventually submitted
+    local entry=""
+    if [ -f "$retry_log" ]; then
+        entry="$(python3 -c "
+import json, sys
+for line in open('$retry_log'):
+    line = line.strip()
+    if not line: continue
+    d = json.loads(line)
+    cf = d.get('subject', {}).get('client_filename', '')
+    if 'retry-${collector}' in cf:
+        print(line)
+        break
+" 2>/dev/null)"
+    fi
+
+    if [ -n "$entry" ]; then
+        local score; score="$(get_score "$entry")"
+        pass "$collector/retry-recovery: file submitted after server came up (score=$score)"
+    else
+        # Check if the collector even attempted retries
+        if grep -qi 'retry\|attempt\|retrying\|failed.*attempt' "$collector_out" 2>/dev/null; then
+            fail "$collector/retry-recovery: retried but file never submitted"
+        else
+            fail "$collector/retry-recovery: no retry attempt detected"
+        fi
+    fi
+
+    # Cleanup: kill the delayed stub
+    kill "$stub_pid" 2>/dev/null
+    wait "$stub_pid" 2>/dev/null || true
+    rm -rf "$fixtures" "$retry_log" "$collector_out"
+}
+
+# ── 20. Server returns errors — collector must not crash ────────────────────
+# Submit to a port where nothing listens (connection refused).
+# The collector must exit gracefully, not crash.
+test_server_unreachable() {
+    local collector="$1"
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/detection-test-XXXXXX)"
+    mkdir -p "$fixtures/unreachable"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/unreachable/orphan-${collector}.exe"
+
+    # Port 18099 has nothing listening — all uploads will fail
+    local dead_port=18099
+    local collector_out; collector_out="$(mktemp /tmp/collector-out-XXXXXX.txt)"
+
+    # Run with minimal retries to avoid long wait.
+    # Use timeout to kill collectors that hang; || true to prevent set -e from aborting.
+    local exit_code=0
+    case "$collector" in
+        bash)
+            timeout 20 bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$dead_port" --dir "$fixtures/unreachable" \
+                --max-age 30 --retries 1 > "$collector_out" 2>&1 || exit_code=$?
+            ;;
+        python)
+            timeout 20 python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                --server localhost --port "$dead_port" --dir "$fixtures/unreachable" \
+                --max-age 30 --retries 1 > "$collector_out" 2>&1 || exit_code=$?
+            ;;
+        perl)
+            timeout 20 perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$dead_port" --dir "$fixtures/unreachable" \
+                --max-age 30 --retries 1 > "$collector_out" 2>&1 || exit_code=$?
+            ;;
+        ps3)
+            timeout 20 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$dead_port" -Folder "$fixtures/unreachable" \
+                -MaxAge 30 > "$collector_out" 2>&1 || exit_code=$?
+            ;;
+        ps2)
+            timeout 20 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$dead_port" -Folder "$fixtures/unreachable" \
+                -MaxAge 30 > "$collector_out" 2>&1 || exit_code=$?
+            ;;
+    esac
+
+    # The collector should exit (not hang forever) and not crash with a traceback
+    if [ "$exit_code" -eq 124 ]; then
+        fail "$collector/server-unreachable: collector hung (killed by timeout)"
+    elif grep -qi 'traceback\|panic\|segfault\|core dump' "$collector_out" 2>/dev/null; then
+        fail "$collector/server-unreachable: collector crashed"
+    else
+        # Verify it reported the failure somehow
+        if grep -qi 'fail\|error\|could not\|unable\|refused' "$collector_out" 2>/dev/null; then
+            pass "$collector/server-unreachable: exited gracefully with error message"
+        else
+            pass "$collector/server-unreachable: exited without crash (exit=$exit_code)"
+        fi
+    fi
+
+    rm -rf "$fixtures" "$collector_out"
+}
+
+# ============================================================================
+# MAIN
+# ============================================================================
+
+LOG_OFFSET=0
+
+echo ""
+echo "${BOLD}Detection & Path Verification Tests${RESET}"
+echo "============================================"
+
+# If STUB_LOG is already set and the stub is already running, skip starting one
+if [ -n "$STUB_LOG" ] && curl -s "${STUB_URL}/api/info" >/dev/null 2>&1; then
+    echo "Using external stub server on port $STUB_PORT (log=$STUB_LOG)"
+else
+    # Pre-flight checks
+    if [ -z "$STUB_BIN" ]; then
+        echo "ERROR: thunderstorm-stub binary not found." >&2
+        echo "Set STUB_BIN_PATH or build with: go build -tags yara -o thunderstorm-stub ." >&2
+        exit 1
+    fi
+
+    # Start the stub server
+    start_stub
+    echo "Stub server: pid=$STUB_PID log=$STUB_LOG"
+fi
+
+# Check which collectors are available
+available_collectors=()
+command -v bash >/dev/null 2>&1 && available_collectors+=("bash")
+command -v python3 >/dev/null 2>&1 && available_collectors+=("python")
+command -v perl >/dev/null 2>&1 && available_collectors+=("perl")
+command -v pwsh >/dev/null 2>&1 && available_collectors+=("ps3" "ps2")
+
+echo "Available collectors: ${available_collectors[*]}"
+echo ""
+
+for collector in "${available_collectors[@]}"; do
+    printf "\n${CYAN}── %s ──${RESET}\n" "$collector"
+
+    # Create unique fixtures for this collector
+    FIXTURES="$(setup_collector_fixtures "$collector")"
+
+    test_malicious_detected "$collector" "$FIXTURES"
+    test_benign_no_match "$collector" "$FIXTURES"
+    test_filename_ioc "$collector" "$FIXTURES"
+    test_size_filter_no_event "$collector" "$FIXTURES"
+    test_large_malicious_detected "$collector" "$FIXTURES"
+    test_full_path_in_log "$collector" "$FIXTURES"
+    test_directory_scope "$collector"
+    test_age_filter "$collector"
+    test_extension_filter "$collector"
+    test_subdirectory_recursion "$collector"
+    test_empty_file "$collector"
+    test_unicode_filename "$collector"
+    test_symlink_not_followed "$collector"
+    test_broken_symlink "$collector"
+    test_special_chars_filename "$collector"
+    test_excluded_dirs_survive "$collector"
+    test_unreadable_file "$collector"
+    test_server_unreachable "$collector"
+    test_retry_on_late_server "$collector"
+
+    rm -rf "$FIXTURES" /tmp/x 2>/dev/null
+done
+
+stop_stub
+
+echo ""
+echo "============================================"
+printf " Results: ${GREEN}%d passed${RESET}, ${RED}%d failed${RESET}, ${YELLOW}%d skipped${RESET}\n" \
+    "$TESTS_PASSED" "$TESTS_FAILED" "$TESTS_SKIPPED"
+echo "============================================"
+
+if [ -n "$FAILED_NAMES" ]; then
+    printf "\nFailed tests:\n$FAILED_NAMES\n"
+fi
+
+[ "$TESTS_FAILED" -eq 0 ]
diff --git a/scripts/tests/run_e2e_compliance.sh b/scripts/tests/run_e2e_compliance.sh
new file mode 100755
index 0000000..610393a
--- /dev/null
+++ b/scripts/tests/run_e2e_compliance.sh
@@ -0,0 +1,414 @@
+#!/usr/bin/env bash
+#
+# End-to-End Compliance Tests for Thunderstorm Collector Scripts
+#
+# Verifies that each collector sends correctly formatted multipart uploads
+# with proper metadata fields that a Thunderstorm server can parse.
+#
+# Tests run against a stub server with JSONL audit log for field verification.
+# Checks: source, filename, file integrity (MD5), collection markers,
+#         zero-byte files, binary files, filenames with spaces/special chars.
+#
+# Usage:
+#   ./run_e2e_compliance.sh [stub-server-binary]
+#
+# Environment:
+#   STUB_SERVER_BIN      Path to stub server binary
+#   THUNDERSTORM_HOST    Real Thunderstorm host (optional, for live smoke tests)
+#   THUNDERSTORM_PORT    Real Thunderstorm port (default: 8081)
+#
+
+set -euo pipefail
+
+TESTS_DIR="$(cd "$(dirname "$0")" && pwd)"
+SCRIPTS_DIR="$(cd "$TESTS_DIR/.." && pwd)"
+
+STUB_PORT=19993
+STUB_LOG="/tmp/e2e-compliance.jsonl"
+STUB_PID=""
+
+TS_HOST="${THUNDERSTORM_HOST:-}"
+TS_PORT="${THUNDERSTORM_PORT:-8081}"
+
+FIXTURES="/tmp/e2e-compliance-fixtures"
+PASS=0
+FAIL=0
+SKIP=0
+
+RED='\033[31m'; GREEN='\033[32m'; YELLOW='\033[33m'; CYAN='\033[36m'; BOLD='\033[1m'; RESET='\033[0m'
+
+pass()    { PASS=$((PASS+1)); printf "  ${GREEN}PASS${RESET} %s\n" "$1"; }
+fail()    { FAIL=$((FAIL+1)); printf "  ${RED}FAIL${RESET} %s\n" "$1"; }
+skip()    { SKIP=$((SKIP+1)); printf "  ${YELLOW}SKIP${RESET} %s\n" "$1"; }
+section() { printf "\n${BOLD}${CYAN}── %s ──${RESET}\n" "$1"; }
+
+# ── Stub Server ───────────────────────────────────────────────────────────────
+
+find_stub() {
+    if [ -n "${1:-}" ] && [ -x "$1" ]; then echo "$1"; return 0; fi
+    if [ -n "${STUB_SERVER_BIN:-}" ] && [ -x "$STUB_SERVER_BIN" ]; then echo "$STUB_SERVER_BIN"; return 0; fi
+    local sibling="$SCRIPTS_DIR/../../thunderstorm-stub-server/thunderstorm-stub-server"
+    if [ -x "$sibling" ]; then echo "$sibling"; return 0; fi
+    for p in \
+        "$HOME/.openclaw/workspace/projects/thunderstorm-stub-server/thunderstorm-stub-server" \
+        "$HOME/thunderstorm-stub-server/thunderstorm-stub-server"; do
+        if [ -x "$p" ]; then echo "$p"; return 0; fi
+    done
+    command -v thunderstorm-stub-server 2>/dev/null && return 0
+    return 1
+}
+
+start_stub() {
+    pkill -f "stub-server.*$STUB_PORT" 2>/dev/null || true
+    sleep 1
+    rm -f "$STUB_LOG"
+    "$1" -port "$STUB_PORT" -log-file "$STUB_LOG" &
+    STUB_PID=$!
+    sleep 2
+    if ! curl -sf "http://127.0.0.1:$STUB_PORT/api/status" >/dev/null 2>&1; then
+        echo "ERROR: Stub server failed to start on port $STUB_PORT"; exit 1
+    fi
+}
+
+stop_stub() { [ -n "$STUB_PID" ] && kill "$STUB_PID" 2>/dev/null && wait "$STUB_PID" 2>/dev/null || true; STUB_PID=""; }
+
+cleanup() { stop_stub; rm -rf "$FIXTURES"; }
+trap cleanup EXIT
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+create_fixtures() {
+    rm -rf "$FIXTURES"
+    mkdir -p "$FIXTURES/subdir with spaces" "$FIXTURES/nested/deep"
+    echo "hello world" > "$FIXTURES/normal.txt"
+    echo "spaced" > "$FIXTURES/file with spaces.txt"
+    echo "special" > "$FIXTURES/special-chars_v2.0(1).txt"
+    printf '\x00\x01\x02\x03DEADBEEF\x00\xff\xfe' > "$FIXTURES/binary.bin"
+    echo "nested space" > "$FIXTURES/subdir with spaces/inner.txt"
+    echo "deep" > "$FIXTURES/nested/deep/deep.txt"
+    touch "$FIXTURES/empty.txt"
+    echo "report" > "$FIXTURES/report-2024.txt"
+}
+
+# ── JSONL Helpers ─────────────────────────────────────────────────────────────
+
+jsonl_count() { wc -l < "$STUB_LOG" 2>/dev/null | tr -d ' '; }
+
+# Get upload entries (type="THOR finding") since line N
+jsonl_uploads_since() {
+    tail -n +"$1" "$STUB_LOG" 2>/dev/null | python3 -c "
+import sys, json
+for line in sys.stdin:
+    line = line.strip()
+    if not line: continue
+    try:
+        d = json.loads(line)
+        if d.get('type') == 'THOR finding': print(line)
+    except: pass
+"
+}
+
+# Get all entries since line N
+jsonl_since() { tail -n +"$1" "$STUB_LOG" 2>/dev/null; }
+
+# Extract a dotted field path from a JSON line
+jf() {
+    echo "$1" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+keys = '${2}'.split('.')
+val = data
+for k in keys:
+    val = val.get(k) if isinstance(val, dict) else None
+    if val is None: break
+if val is not None: print(val)
+" 2>/dev/null
+}
+
+# Find first upload entry matching a client_filename substring
+find_upload() {
+    echo "$1" | python3 -c "
+import sys, json
+target = '${2}'
+for line in sys.stdin:
+    line = line.strip()
+    if not line: continue
+    d = json.loads(line)
+    cf = d.get('subject',{}).get('client_filename','')
+    if target in cf: print(line); break
+" 2>/dev/null
+}
+
+# Find marker entry by type
+find_marker() {
+    echo "$1" | python3 -c "
+import sys, json
+target = '${2}'
+for line in sys.stdin:
+    line = line.strip()
+    if not line: continue
+    d = json.loads(line)
+    if d.get('type') == 'collection_marker' and d.get('marker') == target: print(line); break
+" 2>/dev/null
+}
+
+# ── Assertions ────────────────────────────────────────────────────────────────
+
+assert_eq()       { [ "$(jf "$1" "$2")" = "$3" ] && pass "$4" || fail "$4: expected='$3' got='$(jf "$1" "$2")'"; }
+assert_nonempty() { [ -n "$(jf "$1" "$2")" ] && pass "$3: $(jf "$1" "$2")" || fail "$3: empty"; }
+assert_md5()      { local exp; exp=$(md5sum "$2" | awk '{print $1}'); local got; got=$(jf "$1" "subject.hashes.md5"); [ "$exp" = "$got" ] && pass "$3: MD5 $exp" || fail "$3: MD5 expected=$exp got=$got"; }
+
+# ── Test Runner ───────────────────────────────────────────────────────────────
+
+run_tests() {
+    local name="$1"; shift
+    local source_val="E2E Test (v2.0)"
+    local start_line uploads all_entries entry
+
+    section "$name"
+
+    start_line=$(($(jsonl_count) + 1))
+    "$@" --source "$source_val" > /dev/null 2>&1 || true
+    sleep 2
+
+    uploads=$(jsonl_uploads_since "$start_line")
+    all_entries=$(jsonl_since "$start_line")
+
+    if [ -z "$uploads" ]; then
+        fail "$name: no uploads recorded (collector may have crashed)"
+        return
+    fi
+
+    # Source parameter arrives correctly
+    entry=$(echo "$uploads" | head -1)
+    assert_eq "$entry" "subject.source" "$source_val" "$name/source"
+
+    # Collection markers
+    local begin_m; begin_m=$(find_marker "$all_entries" "begin")
+    local end_m; end_m=$(find_marker "$all_entries" "end")
+    if [ -n "$begin_m" ]; then
+        pass "$name/marker-begin"
+        assert_nonempty "$begin_m" "collector" "$name/marker-collector"
+        assert_eq "$begin_m" "source" "$source_val" "$name/marker-source"
+    else
+        fail "$name/marker-begin: not found"
+    fi
+    [ -n "$end_m" ] && pass "$name/marker-end" || fail "$name/marker-end: not found"
+
+    # File content integrity — text
+    entry=$(find_upload "$uploads" "normal.txt")
+    if [ -n "$entry" ]; then
+        assert_md5 "$entry" "$FIXTURES/normal.txt" "$name/integrity-text"
+    else
+        fail "$name/integrity-text: not found"
+    fi
+
+    # File content integrity — binary with NUL bytes
+    entry=$(find_upload "$uploads" "binary.bin")
+    if [ -n "$entry" ]; then
+        assert_md5 "$entry" "$FIXTURES/binary.bin" "$name/integrity-binary"
+    else
+        fail "$name/integrity-binary: not found"
+    fi
+
+    # Filename with spaces
+    entry=$(find_upload "$uploads" "file with spaces")
+    if [ -n "$entry" ]; then
+        assert_md5 "$entry" "$FIXTURES/file with spaces.txt" "$name/spaces-in-name"
+    else
+        fail "$name/spaces-in-name: not found"
+    fi
+
+    # Special characters in filename
+    entry=$(find_upload "$uploads" "special-chars")
+    if [ -n "$entry" ]; then
+        assert_md5 "$entry" "$FIXTURES/special-chars_v2.0(1).txt" "$name/special-chars"
+    else
+        fail "$name/special-chars: not found"
+    fi
+
+    # Zero-byte file
+    entry=$(find_upload "$uploads" "empty.txt")
+    if [ -n "$entry" ]; then
+        local sz; sz=$(jf "$entry" "subject.size")
+        [ "$sz" = "0" ] && pass "$name/zero-byte" || fail "$name/zero-byte: size=$sz"
+    else
+        fail "$name/zero-byte: not found"
+    fi
+
+    # Nested directory
+    entry=$(find_upload "$uploads" "deep.txt")
+    [ -n "$entry" ] && pass "$name/nested-dir" || fail "$name/nested-dir: not found"
+
+    # Subdirectory with spaces
+    entry=$(find_upload "$uploads" "inner.txt")
+    [ -n "$entry" ] && pass "$name/subdir-spaces" || fail "$name/subdir-spaces: not found"
+
+    # Total count
+    local n; n=$(echo "$uploads" | wc -l | tr -d ' ')
+    [ "$n" -ge 8 ] && pass "$name/count: $n files" || fail "$name/count: $n files (expected ≥8)"
+}
+
+# PowerShell wrapper (uses -Source instead of --source)
+run_tests_ps() {
+    local name="$1" script="$2"
+    local source_val="E2E Test (v2.0)"
+    local start_line uploads entry
+
+    section "$name"
+
+    start_line=$(($(jsonl_count) + 1))
+    pwsh -NoProfile -ep bypass -c "& '$script' \
+        -ThunderstormServer '127.0.0.1' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES' -MaxAge 365 -AllExtensions \
+        -Source '$source_val'" > /dev/null 2>&1 || true
+    sleep 2
+
+    uploads=$(jsonl_uploads_since "$start_line")
+    if [ -z "$uploads" ]; then
+        fail "$name: no uploads recorded (collector may have crashed)"
+        return
+    fi
+
+    entry=$(echo "$uploads" | head -1)
+    assert_eq "$entry" "subject.source" "$source_val" "$name/source"
+
+    entry=$(find_upload "$uploads" "normal.txt")
+    [ -n "$entry" ] && assert_md5 "$entry" "$FIXTURES/normal.txt" "$name/integrity-text" || fail "$name/integrity-text"
+
+    entry=$(find_upload "$uploads" "binary.bin")
+    [ -n "$entry" ] && assert_md5 "$entry" "$FIXTURES/binary.bin" "$name/integrity-binary" || fail "$name/integrity-binary"
+
+    entry=$(find_upload "$uploads" "file with spaces")
+    [ -n "$entry" ] && assert_md5 "$entry" "$FIXTURES/file with spaces.txt" "$name/spaces-in-name" || fail "$name/spaces-in-name"
+
+    entry=$(find_upload "$uploads" "empty.txt")
+    if [ -n "$entry" ]; then
+        local sz; sz=$(jf "$entry" "subject.size")
+        [ "$sz" = "0" ] && pass "$name/zero-byte" || fail "$name/zero-byte: size=$sz"
+    else fail "$name/zero-byte"; fi
+
+    local n; n=$(echo "$uploads" | wc -l | tr -d ' ')
+    [ "$n" -ge 5 ] && pass "$name/count: $n files" || fail "$name/count: $n files (expected ≥5)"
+}
+
+run_dry_run_test() {
+    local name="$1"; shift
+    local start_line n
+    start_line=$(($(jsonl_count) + 1))
+    "$@" --dry-run > /dev/null 2>&1 || true
+    sleep 1
+    n=$(jsonl_uploads_since "$start_line" | wc -l | tr -d ' ')
+    [ "$n" -eq 0 ] && pass "$name/dry-run" || fail "$name/dry-run: $n uploads (should be 0)"
+}
+
+# ── Main ──────────────────────────────────────────────────────────────────────
+
+echo ""
+echo "============================================"
+echo " E2E Compliance Tests"
+echo " Stub: 127.0.0.1:$STUB_PORT"
+[ -n "$TS_HOST" ] && echo " Thunderstorm: $TS_HOST:$TS_PORT"
+echo "============================================"
+
+STUB_BIN=$(find_stub "${1:-}" || true)
+if [ -z "$STUB_BIN" ]; then
+    echo "ERROR: Cannot find stub server binary"; exit 1
+fi
+echo "Stub: $STUB_BIN"
+start_stub "$STUB_BIN"
+create_fixtures
+
+# Bash
+run_tests "bash" bash "$SCRIPTS_DIR/thunderstorm-collector.sh" \
+    --server 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365 --quiet
+run_dry_run_test "bash" bash "$SCRIPTS_DIR/thunderstorm-collector.sh" \
+    --server 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365 --quiet
+
+# Ash / POSIX sh
+if command -v dash >/dev/null 2>&1; then
+    run_tests "ash (dash)" dash "$SCRIPTS_DIR/thunderstorm-collector-ash.sh" \
+        --server 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365 --quiet
+    run_dry_run_test "ash (dash)" dash "$SCRIPTS_DIR/thunderstorm-collector-ash.sh" \
+        --server 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365 --quiet
+else
+    section "ash"; skip "no dash or busybox available"
+fi
+
+# Python 3
+if command -v python3 >/dev/null 2>&1; then
+    run_tests "python3" python3 "$SCRIPTS_DIR/thunderstorm-collector.py" \
+        -s 127.0.0.1 -p "$STUB_PORT" -d "$FIXTURES" --max-age 365
+    run_dry_run_test "python3" python3 "$SCRIPTS_DIR/thunderstorm-collector.py" \
+        -s 127.0.0.1 -p "$STUB_PORT" -d "$FIXTURES" --max-age 365
+else
+    section "python3"; skip "not available"
+fi
+
+# Python 2
+if command -v python2 >/dev/null 2>&1; then
+    run_tests "python2" python2 "$SCRIPTS_DIR/thunderstorm-collector-py2.py" \
+        -s 127.0.0.1 -p "$STUB_PORT" -d "$FIXTURES" --max-age 365
+    run_dry_run_test "python2" python2 "$SCRIPTS_DIR/thunderstorm-collector-py2.py" \
+        -s 127.0.0.1 -p "$STUB_PORT" -d "$FIXTURES" --max-age 365
+else
+    section "python2"; skip "not available"
+fi
+
+# Perl
+if command -v perl >/dev/null 2>&1 && perl -MLWP::UserAgent -e1 2>/dev/null; then
+    run_tests "perl" perl "$SCRIPTS_DIR/thunderstorm-collector.pl" \
+        -s 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365
+    run_dry_run_test "perl" perl "$SCRIPTS_DIR/thunderstorm-collector.pl" \
+        -s 127.0.0.1 --port "$STUB_PORT" --dir "$FIXTURES" --max-age 365
+else
+    section "perl"; skip "not available or missing LWP::UserAgent"
+fi
+
+# PowerShell 3+
+if command -v pwsh >/dev/null 2>&1; then
+    run_tests_ps "powershell3+" "$SCRIPTS_DIR/thunderstorm-collector.ps1"
+else
+    section "powershell3+"; skip "pwsh not available"
+fi
+
+# PowerShell 2+
+if command -v pwsh >/dev/null 2>&1; then
+    run_tests_ps "powershell2+" "$SCRIPTS_DIR/thunderstorm-collector-ps2.ps1"
+else
+    section "powershell2+"; skip "pwsh not available"
+fi
+
+# Real Thunderstorm smoke tests
+if [ -n "$TS_HOST" ]; then
+    section "Real Thunderstorm ($TS_HOST:$TS_PORT)"
+    if curl -sf "http://$TS_HOST:$TS_PORT/api/status" >/dev/null 2>&1; then
+        pass "connectivity: server reachable"
+        TS_FIX="/tmp/e2e-ts-smoke"
+        rm -rf "$TS_FIX"; mkdir -p "$TS_FIX"
+        echo "live test" > "$TS_FIX/live.txt"
+        printf '\x00BINARY\x00' > "$TS_FIX/live.bin"
+
+        for info in \
+            "bash:bash $SCRIPTS_DIR/thunderstorm-collector.sh --server $TS_HOST --port $TS_PORT --dir $TS_FIX --max-age 365 --quiet" \
+            "python3:python3 $SCRIPTS_DIR/thunderstorm-collector.py -s $TS_HOST -p $TS_PORT -d $TS_FIX --max-age 365" \
+            "perl:perl $SCRIPTS_DIR/thunderstorm-collector.pl -s $TS_HOST --port $TS_PORT --dir $TS_FIX --max-age 365" \
+            "ps3:pwsh -NoProfile -ep bypass -c \"& '$SCRIPTS_DIR/thunderstorm-collector.ps1' -ThunderstormServer $TS_HOST -ThunderstormPort $TS_PORT -Folder '$TS_FIX' -MaxAge 365 -AllExtensions\""; do
+            n="${info%%:*}"; c="${info#*:}"
+            if eval "$c" >/dev/null 2>&1; then
+                pass "live/$n: upload succeeded"
+            else
+                fail "live/$n: upload failed"
+            fi
+        done
+        rm -rf "$TS_FIX"
+    else
+        fail "connectivity: unreachable at $TS_HOST:$TS_PORT"
+    fi
+fi
+
+echo ""
+echo "============================================"
+printf " Results: ${GREEN}%d passed${RESET}, ${RED}%d failed${RESET}, ${YELLOW}%d skipped${RESET}\n" "$PASS" "$FAIL" "$SKIP"
+echo "============================================"
+[ "$FAIL" -eq 0 ] && exit 0 || exit 1
diff --git a/scripts/tests/run_filter_tests.sh b/scripts/tests/run_filter_tests.sh
new file mode 100755
index 0000000..5d58296
--- /dev/null
+++ b/scripts/tests/run_filter_tests.sh
@@ -0,0 +1,367 @@
+#!/usr/bin/env bash
+#
+# Filter / Selector Tests for All Script Collectors
+# Tests: --max-age, --max-size, and extension filtering
+#
+# Requires: stub server running on $STUB_PORT, test fixtures in $FIXTURES_DIR
+#
+set -euo pipefail
+
+STUB_HOST="${STUB_HOST:-127.0.0.1}"
+STUB_PORT="${STUB_PORT:-19990}"
+STUB_LOG="${STUB_LOG:-/tmp/stub-filter-test.jsonl}"
+FIXTURES_DIR="${FIXTURES_DIR:-/tmp/filter-test-fixtures}"
+SCRIPTS_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+TMP_DIR="$(mktemp -d)"
+trap 'rm -rf "$TMP_DIR"' EXIT
+
+PASS=0
+FAIL=0
+SKIP=0
+
+pass() { PASS=$((PASS+1)); printf "  \033[32mPASS\033[0m %s\n" "$1"; }
+fail() { FAIL=$((FAIL+1)); printf "  \033[31mFAIL\033[0m %s\n" "$1"; }
+skip() { SKIP=$((SKIP+1)); printf "  \033[33mSKIP\033[0m %s\n" "$1"; }
+
+# Get uploaded filenames from stub server JSONL log since a given line
+# Extracts basename from client_filename field
+get_uploaded_files() {
+    local start_line="$1"
+    tail -n +"$start_line" "$STUB_LOG" 2>/dev/null \
+        | grep -o '"client_filename":"[^"]*"' \
+        | sed 's/"client_filename":"//;s/"//' \
+        | xargs -I{} basename {} \
+        | sort
+}
+
+log_lines() {
+    wc -l < "$STUB_LOG" 2>/dev/null | tr -d ' '
+}
+
+assert_uploaded() {
+    local start="$1" filename="$2" label="$3"
+    if get_uploaded_files "$start" | grep -qF "$filename"; then
+        pass "$label: '$filename' uploaded"
+    else
+        fail "$label: '$filename' NOT uploaded (expected)"
+    fi
+}
+
+assert_not_uploaded() {
+    local start="$1" filename="$2" label="$3"
+    if get_uploaded_files "$start" | grep -qF "$filename"; then
+        fail "$label: '$filename' uploaded (should be filtered)"
+    else
+        pass "$label: '$filename' filtered out"
+    fi
+}
+
+# Create patched copies of Python/Perl collectors with specific max_age/max_size
+patch_python() {
+    local max_age="$1" max_size_kb="$2" out="$TMP_DIR/thunderstorm-collector-patched.py"
+    # Patch both the global default and the argparse default
+    sed -e "s/^max_age = .*/max_age = $max_age/" \
+        -e "s/^max_size = .*/max_size = $max_size_kb/" \
+        -e "s/\"--max-size-kb\", type=int, default=[0-9]*/\"--max-size-kb\", type=int, default=$max_size_kb/" \
+        -e "s/\"--max-age\", type=int, default=[0-9]*/\"--max-age\", type=int, default=$max_age/" \
+        "$SCRIPTS_DIR/thunderstorm-collector.py" > "$out"
+    echo "$out"
+}
+
+patch_python2() {
+    local max_age="$1" max_size_kb="$2" out="$TMP_DIR/thunderstorm-collector-py2-patched.py"
+    # Patch both the global default and the argparse default
+    sed -e "s/^max_age = .*/max_age = $max_age/" \
+        -e "s/^max_size = .*/max_size = $max_size_kb/" \
+        -e "s/\"--max-size-kb\", type=int, default=[0-9]*/\"--max-size-kb\", type=int, default=$max_size_kb/" \
+        -e "s/\"--max-age\", type=int, default=[0-9]*/\"--max-age\", type=int, default=$max_age/" \
+        "$SCRIPTS_DIR/thunderstorm-collector-py2.py" > "$out"
+    echo "$out"
+}
+
+patch_perl() {
+    local max_age="$1" max_size_kb="$2" out="$TMP_DIR/thunderstorm-collector-patched.pl"
+    sed -e "s/^our \\\$max_age = .*/our \$max_age = $max_age;/" \
+        -e "s/^our \\\$max_size_kb = .*/our \$max_size_kb = $max_size_kb;/" \
+        "$SCRIPTS_DIR/thunderstorm-collector.pl" > "$out"
+    echo "$out"
+}
+
+# Ensure stub server is running
+if ! curl -s "http://${STUB_HOST}:${STUB_PORT}/api/status" >/dev/null 2>&1; then
+    echo "ERROR: Stub server not running on ${STUB_HOST}:${STUB_PORT}"
+    exit 1
+fi
+
+if [ ! -d "$FIXTURES_DIR" ]; then
+    echo "ERROR: Fixtures directory not found: $FIXTURES_DIR"
+    exit 1
+fi
+
+echo "============================================"
+echo " Filter / Selector Tests"
+echo " Server: ${STUB_HOST}:${STUB_PORT}"
+echo " Fixtures: ${FIXTURES_DIR}"
+echo "============================================"
+echo ""
+
+# ══════════════════════════════════════════════
+# BASH COLLECTOR
+# ══════════════════════════════════════════════
+echo "── Bash Collector ──────────────────────────"
+
+# max-size: 1000KB limit → small(100B), fresh(6B), old(4B), ancient(8B),
+#   medium(500KB) pass; large(3MB), huge(25MB) filtered
+# Also passes: sample.exe(12B), sample.dll(12B), photo.jpg(12B), settings.conf(13B), noext(13B), nested.txt(7B)
+start=$(log_lines)
+bash "$SCRIPTS_DIR/thunderstorm-collector.sh" \
+    --server "$STUB_HOST" --port "$STUB_PORT" \
+    --dir "$FIXTURES_DIR" --max-size-kb 1000 --max-age 365 --quiet 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "small.txt"    "bash/max-size-1000KB"
+assert_uploaded     "$start" "medium.bin"   "bash/max-size-1000KB"
+assert_not_uploaded "$start" "large.bin"    "bash/max-size-1000KB"
+assert_not_uploaded "$start" "huge.bin"     "bash/max-size-1000KB"
+
+# max-age: 7 days → only files created today pass (fresh, small, medium, large, huge, extensions, nested, noext)
+# old(30d) and ancient(90d) filtered
+start=$(log_lines)
+bash "$SCRIPTS_DIR/thunderstorm-collector.sh" \
+    --server "$STUB_HOST" --port "$STUB_PORT" \
+    --dir "$FIXTURES_DIR" --max-age 7 --max-size-kb 50000 --quiet 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "fresh.txt"    "bash/max-age-7d"
+assert_uploaded     "$start" "small.txt"    "bash/max-age-7d"
+assert_not_uploaded "$start" "old.txt"      "bash/max-age-7d"
+assert_not_uploaded "$start" "ancient.txt"  "bash/max-age-7d"
+
+# combined: 7 days + 200KB → only small fresh files
+start=$(log_lines)
+bash "$SCRIPTS_DIR/thunderstorm-collector.sh" \
+    --server "$STUB_HOST" --port "$STUB_PORT" \
+    --dir "$FIXTURES_DIR" --max-age 7 --max-size-kb 200 --quiet 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "fresh.txt"    "bash/combined"
+assert_not_uploaded "$start" "medium.bin"   "bash/combined"
+assert_not_uploaded "$start" "old.txt"      "bash/combined"
+assert_not_uploaded "$start" "large.bin"    "bash/combined"
+
+echo ""
+
+# ══════════════════════════════════════════════
+# ASH / POSIX SH COLLECTOR
+# ══════════════════════════════════════════════
+if command -v dash >/dev/null 2>&1; then
+    ASH_SHELL="dash"
+elif command -v busybox >/dev/null 2>&1; then
+    ASH_SHELL="busybox sh"
+else
+    ASH_SHELL=""
+fi
+
+if [ -n "$ASH_SHELL" ]; then
+    echo "── POSIX sh Collector (via $ASH_SHELL) ──────"
+
+    start=$(log_lines)
+    $ASH_SHELL "$SCRIPTS_DIR/thunderstorm-collector-ash.sh" \
+        --server "$STUB_HOST" --port "$STUB_PORT" \
+        --dir "$FIXTURES_DIR" --max-size-kb 1000 --max-age 365 --quiet 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "small.txt"    "ash/max-size-1000KB"
+    assert_uploaded     "$start" "medium.bin"   "ash/max-size-1000KB"
+    assert_not_uploaded "$start" "large.bin"    "ash/max-size-1000KB"
+    assert_not_uploaded "$start" "huge.bin"     "ash/max-size-1000KB"
+
+    start=$(log_lines)
+    $ASH_SHELL "$SCRIPTS_DIR/thunderstorm-collector-ash.sh" \
+        --server "$STUB_HOST" --port "$STUB_PORT" \
+        --dir "$FIXTURES_DIR" --max-age 7 --max-size-kb 50000 --quiet 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "fresh.txt"    "ash/max-age-7d"
+    assert_not_uploaded "$start" "old.txt"      "ash/max-age-7d"
+    assert_not_uploaded "$start" "ancient.txt"  "ash/max-age-7d"
+
+    echo ""
+else
+    echo "── POSIX sh Collector ────────────────────────"
+    skip "neither dash nor busybox available"
+    echo ""
+fi
+
+# ══════════════════════════════════════════════
+# PYTHON 3 COLLECTOR
+# ══════════════════════════════════════════════
+echo "── Python 3 Collector ────────────────────────"
+
+# max-size test: 1024KB (~1MB), 365 days max_age
+py_script="$(patch_python 365 1024)"
+start=$(log_lines)
+python3 "$py_script" -s "$STUB_HOST" -p "$STUB_PORT" -d "$FIXTURES_DIR" 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "small.txt"    "python3/max-size-1MB"
+assert_uploaded     "$start" "medium.bin"   "python3/max-size-1MB"
+assert_not_uploaded "$start" "large.bin"    "python3/max-size-1MB"
+assert_not_uploaded "$start" "huge.bin"     "python3/max-size-1MB"
+
+# max-age test: patch to 7 days max_age, 100MB max_size
+py_script="$(patch_python 7 50000)"
+start=$(log_lines)
+python3 "$py_script" -s "$STUB_HOST" -p "$STUB_PORT" -d "$FIXTURES_DIR" 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "fresh.txt"    "python3/max-age-7d"
+assert_not_uploaded "$start" "old.txt"      "python3/max-age-7d"
+assert_not_uploaded "$start" "ancient.txt"  "python3/max-age-7d"
+
+# combined: 7 days + 200KB (only tiny fresh files; medium.bin is 500KB → filtered)
+py_script="$(patch_python 7 200)"
+start=$(log_lines)
+python3 "$py_script" -s "$STUB_HOST" -p "$STUB_PORT" -d "$FIXTURES_DIR" 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "fresh.txt"    "python3/combined"
+assert_not_uploaded "$start" "medium.bin"   "python3/combined"
+assert_not_uploaded "$start" "old.txt"      "python3/combined"
+
+echo ""
+
+# ══════════════════════════════════════════════
+# PYTHON 2 COLLECTOR
+# ══════════════════════════════════════════════
+if command -v python2 >/dev/null 2>&1; then
+    echo "── Python 2 Collector ────────────────────────"
+
+    py2_script="$(patch_python2 365 1024)"
+    start=$(log_lines)
+    python2 "$py2_script" -s "$STUB_HOST" -p "$STUB_PORT" -d "$FIXTURES_DIR" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "small.txt"    "python2/max-size-1MB"
+    assert_not_uploaded "$start" "large.bin"    "python2/max-size-1MB"
+
+    py2_script="$(patch_python2 7 50000)"
+    start=$(log_lines)
+    python2 "$py2_script" -s "$STUB_HOST" -p "$STUB_PORT" -d "$FIXTURES_DIR" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "fresh.txt"    "python2/max-age-7d"
+    assert_not_uploaded "$start" "old.txt"      "python2/max-age-7d"
+
+    echo ""
+else
+    echo "── Python 2 Collector ────────────────────────"
+    skip "python2 not available"
+    echo ""
+fi
+
+# ══════════════════════════════════════════════
+# PERL COLLECTOR
+# ══════════════════════════════════════════════
+echo "── Perl Collector ────────────────────────────"
+
+# max-size test: 1024KB (~1MB), 365 days
+pl_script="$(patch_perl 365 1024)"
+start=$(log_lines)
+perl "$pl_script" -s "$STUB_HOST" --port "$STUB_PORT" --dir "$FIXTURES_DIR" 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "small.txt"    "perl/max-size-1MB"
+assert_not_uploaded "$start" "large.bin"    "perl/max-size-1MB"
+assert_not_uploaded "$start" "huge.bin"     "perl/max-size-1MB"
+
+# max-age test: 7 days, 50000KB (~50MB, effectively no size limit)
+pl_script="$(patch_perl 7 50000)"
+start=$(log_lines)
+perl "$pl_script" -s "$STUB_HOST" --port "$STUB_PORT" --dir "$FIXTURES_DIR" 2>/dev/null || true
+sleep 1
+assert_uploaded     "$start" "fresh.txt"    "perl/max-age-7d"
+assert_not_uploaded "$start" "old.txt"      "perl/max-age-7d"
+assert_not_uploaded "$start" "ancient.txt"  "perl/max-age-7d"
+
+echo ""
+
+# ══════════════════════════════════════════════
+# POWERSHELL COLLECTORS
+# ══════════════════════════════════════════════
+if command -v pwsh >/dev/null 2>&1; then
+    echo "── PowerShell 3+ Collector ─────────────────"
+
+    # max-size: 1MB — use wildcard extension '*' to match all files
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxSize 1 -MaxAge 365 \
+        -Extensions @('.txt','.bin','.exe','.dll','.jpg','.conf')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "small.txt"    "ps3/max-size-1MB"
+    assert_uploaded     "$start" "medium.bin"   "ps3/max-size-1MB"
+    assert_not_uploaded "$start" "large.bin"    "ps3/max-size-1MB"
+    assert_not_uploaded "$start" "huge.bin"     "ps3/max-size-1MB"
+
+    # max-age: 7 days
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxAge 7 -MaxSize 100 \
+        -Extensions @('.txt','.bin','.exe','.dll','.jpg','.conf')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "fresh.txt"    "ps3/max-age-7d"
+    assert_not_uploaded "$start" "old.txt"      "ps3/max-age-7d"
+    assert_not_uploaded "$start" "ancient.txt"  "ps3/max-age-7d"
+
+    # extension filtering: only .exe and .dll
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxAge 365 -MaxSize 100 \
+        -Extensions @('.exe', '.dll')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "sample.exe"   "ps3/ext-filter"
+    assert_uploaded     "$start" "sample.dll"   "ps3/ext-filter"
+    assert_not_uploaded "$start" "photo.jpg"    "ps3/ext-filter"
+    assert_not_uploaded "$start" "fresh.txt"    "ps3/ext-filter"
+    assert_not_uploaded "$start" "noext"        "ps3/ext-filter"
+
+    echo ""
+
+    echo "── PowerShell 2+ Collector ─────────────────"
+
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector-ps2.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxSize 1 -MaxAge 365 \
+        -Extensions @('.txt','.bin','.exe','.dll','.jpg','.conf')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "small.txt"    "ps2/max-size-1MB"
+    assert_not_uploaded "$start" "large.bin"    "ps2/max-size-1MB"
+
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector-ps2.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxAge 7 -MaxSize 100 \
+        -Extensions @('.txt','.bin','.exe','.dll','.jpg','.conf')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "fresh.txt"    "ps2/max-age-7d"
+    assert_not_uploaded "$start" "old.txt"      "ps2/max-age-7d"
+
+    # PS2 extension filtering
+    start=$(log_lines)
+    pwsh -NoProfile -ep bypass -c "& '$SCRIPTS_DIR/thunderstorm-collector-ps2.ps1' \
+        -ThunderstormServer '$STUB_HOST' -ThunderstormPort $STUB_PORT \
+        -Folder '$FIXTURES_DIR' -MaxAge 365 -MaxSize 100 \
+        -Extensions @('.exe', '.dll')" 2>/dev/null || true
+    sleep 1
+    assert_uploaded     "$start" "sample.exe"   "ps2/ext-filter"
+    assert_uploaded     "$start" "sample.dll"   "ps2/ext-filter"
+    assert_not_uploaded "$start" "photo.jpg"    "ps2/ext-filter"
+
+    echo ""
+else
+    echo "── PowerShell Collectors ─────────────────────"
+    skip "pwsh not available"
+    echo ""
+fi
+
+# ══════════════════════════════════════════════
+# SUMMARY
+# ══════════════════════════════════════════════
+echo "============================================"
+echo " Results: $PASS passed, $FAIL failed, $SKIP skipped"
+echo "============================================"
+
+[ "$FAIL" -eq 0 ] && exit 0 || exit 1
diff --git a/scripts/tests/run_operational_tests.sh b/scripts/tests/run_operational_tests.sh
new file mode 100755
index 0000000..ae5d6d4
--- /dev/null
+++ b/scripts/tests/run_operational_tests.sh
@@ -0,0 +1,812 @@
+#!/usr/bin/env bash
+# ============================================================================
+# Operational Feature Tests
+#
+# Tests operational features not covered by detection tests:
+#
+#  1.  Collection markers — begin/end markers sent, scan_id propagated
+#  2.  Interrupted marker — SIGINT sends interrupted marker before exit
+#  3.  Dry-run mode — no uploads, no server contact (bash/python/perl only)
+#  4.  Source identifier — --source sets source field in collection markers
+#  5.  Sync mode — --sync uses /api/check instead of /api/checkAsync
+#  6.  Multiple scan directories — scanning multiple dirs in one run
+#  7.  503 back-pressure — server returns 503, collector retries with Retry-After
+#  8.  Progress reporting — --progress flag doesn't crash, produces output
+#  9.  Syslog logging — --syslog flag doesn't crash (bash only)
+# 10.  curl vs wget fallback — bash collector works with wget when curl absent
+#
+# Requires: thunderstorm-stub server with YARA support
+# ============================================================================
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+COLLECTOR_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+STUB_PORT="${STUB_PORT:-18200}"
+STUB_URL="http://localhost:${STUB_PORT}"
+STUB_LOG=""
+STUB_PID=""
+RULES_DIR=""
+
+# Colours
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[1;36m'
+BOLD='\033[1m'
+RESET='\033[0m'
+
+TESTS_PASSED=0
+TESTS_FAILED=0
+TESTS_SKIPPED=0
+FAILED_NAMES=""
+
+# ── Helpers ─────────────────────────────────────────────────────────────────
+
+pass() { printf "  ${GREEN}PASS${RESET} %s\n" "$*"; TESTS_PASSED=$((TESTS_PASSED + 1)); }
+fail() { printf "  ${RED}FAIL${RESET} %s\n" "$*"; TESTS_FAILED=$((TESTS_FAILED + 1)); FAILED_NAMES="$FAILED_NAMES  - $1\n"; }
+skip() { printf "  ${YELLOW}SKIP${RESET} %s\n" "$*"; TESTS_SKIPPED=$((TESTS_SKIPPED + 1)); }
+
+MALICIOUS_CONTENT='X5O!P%@AP[4\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-FILE!$H+H*'
+
+find_stub() {
+    local candidates=(
+        "${STUB_BIN_PATH:-}"
+        "$SCRIPT_DIR/../../../thunderstorm-stub-server/thunderstorm-stub"
+        "$(command -v thunderstorm-stub 2>/dev/null || true)"
+    )
+    for c in "${candidates[@]}"; do
+        [ -n "$c" ] && [ -x "$c" ] && echo "$c" && return 0
+    done
+    echo "ERROR: thunderstorm-stub not found" >&2
+    return 1
+}
+
+find_rules() {
+    local candidates=(
+        "${STUB_RULES_PATH:-}"
+        "$SCRIPT_DIR/../../../thunderstorm-stub-server/rules"
+    )
+    for c in "${candidates[@]}"; do
+        [ -n "$c" ] && [ -d "$c" ] && echo "$c" && return 0
+    done
+    echo "ERROR: rules directory not found" >&2
+    return 1
+}
+
+start_stub() {
+    local stub_bin; stub_bin="$(find_stub)"
+    RULES_DIR="$(find_rules)"
+    STUB_LOG="$(mktemp /tmp/oper-test-XXXXXX.jsonl)"
+
+    "$stub_bin" -port "$STUB_PORT" -rules-dir "$RULES_DIR" -log-file "$STUB_LOG" \
+        > /dev/null 2>&1 &
+    STUB_PID=$!
+    sleep 2
+    if ! curl -s "$STUB_URL/api/status" > /dev/null; then
+        echo "ERROR: stub failed to start on port $STUB_PORT" >&2
+        exit 1
+    fi
+}
+
+stop_stub() {
+    [ -n "$STUB_PID" ] && kill "$STUB_PID" 2>/dev/null && wait "$STUB_PID" 2>/dev/null || true
+    STUB_PID=""
+}
+
+clear_log() {
+    curl -s -X POST "$STUB_URL/api/test/reset" > /dev/null 2>&1 || true
+}
+
+query_log() {
+    local pattern="$1"
+    python3 -c "
+import json, sys
+for line in open('$STUB_LOG'):
+    line = line.strip()
+    if not line: continue
+    d = json.loads(line)
+    # Search in client_filename, type, marker fields
+    cf = d.get('subject', {}).get('client_filename', '')
+    mtype = d.get('type', '')
+    marker = d.get('marker', '')
+    source = d.get('source', '')
+    raw = json.dumps(d)
+    if '$pattern' in cf or '$pattern' in mtype or '$pattern' in marker or '$pattern' in source or '$pattern' in raw:
+        print(line)
+" 2>/dev/null
+}
+
+sync_stub() { sleep 1; }
+
+# Configure stub to return specific responses
+configure_stub() {
+    local config="$1"
+    curl -s -X POST "$STUB_URL/api/test/config" \
+        -H "Content-Type: application/json" \
+        -d "$config" > /dev/null
+}
+
+# Translate generic flags to PS parameter names
+_translate_ps_args() {
+    local -n out_args=$1; shift
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --max-size-kb) out_args+=("-MaxSize" "$(( $2 / 1024 ))"); shift 2 ;;
+            --max-age)     out_args+=("-MaxAge" "$2"); shift 2 ;;
+            *)             out_args+=("$1"); shift ;;
+        esac
+    done
+}
+
+run_collector() {
+    local name="$1"; shift
+    case "$name" in
+        bash)   run_bash "$@" ;;
+        python) run_python "$@" ;;
+        perl)   run_perl "$@" ;;
+        ps3)    run_ps3 "$@" ;;
+        ps2)    run_ps2 "$@" ;;
+    esac
+}
+
+run_bash() {
+    local dir="$1"; shift
+    bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+        --server localhost --port "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+run_python() {
+    local dir="$1"; shift
+    python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+        --server localhost --port "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+run_perl() {
+    local dir="$1"; shift
+    perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+        -s localhost -p "$STUB_PORT" --dir "$dir" \
+        "$@" 2>&1
+}
+
+run_ps3() {
+    local dir="$1"; shift
+    local args=()
+    _translate_ps_args args "$@"
+    pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+        -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$dir" \
+        "${args[@]}" 2>&1
+}
+
+run_ps2() {
+    local dir="$1"; shift
+    local args=()
+    _translate_ps_args args "$@"
+    pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+        -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$dir" \
+        "${args[@]}" 2>&1
+}
+
+# ============================================================================
+# Tests
+# ============================================================================
+
+# ── 1. Collection markers — begin/end with scan_id ─────────────────────────
+test_collection_markers() {
+    local collector="$1"
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/marker-${collector}.exe"
+
+    run_collector "$collector" "$fixtures" --max-age 30 >/dev/null 2>&1 || true
+    sync_stub
+
+    # Check for begin marker
+    local begin_entry; begin_entry="$(query_log 'begin')"
+    if [ -z "$begin_entry" ]; then
+        fail "$collector/collection-markers: no begin marker found"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Check for end marker
+    local end_entry; end_entry="$(query_log 'end')"
+    if [ -z "$end_entry" ]; then
+        fail "$collector/collection-markers: no end marker found"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    # Verify scan_id is present and consistent
+    local begin_scan_id; begin_scan_id="$(echo "$begin_entry" | head -1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('scan_id',''))" 2>/dev/null)"
+    local end_scan_id; end_scan_id="$(echo "$end_entry" | head -1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('scan_id',''))" 2>/dev/null)"
+
+    if [ -z "$begin_scan_id" ]; then
+        fail "$collector/collection-markers: begin marker missing scan_id"
+    elif [ "$begin_scan_id" != "$end_scan_id" ]; then
+        fail "$collector/collection-markers: scan_id mismatch (begin=$begin_scan_id end=$end_scan_id)"
+    else
+        pass "$collector/collection-markers: begin+end markers with matching scan_id=$begin_scan_id"
+    fi
+
+    # Verify end marker has stats
+    local has_stats; has_stats="$(echo "$end_entry" | head -1 | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+stats = d.get('stats', {})
+print('yes' if stats and 'submitted' in str(stats) else 'no')
+" 2>/dev/null)"
+    if [ "$has_stats" = "yes" ]; then
+        pass "$collector/collection-markers-stats: end marker includes stats"
+    else
+        fail "$collector/collection-markers-stats: end marker missing stats"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 2. Interrupted marker via SIGINT ────────────────────────────────────────
+test_interrupted_marker() {
+    local collector="$1"
+    clear_log
+
+    # Create a large directory tree so the collector takes a while
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    for i in $(seq 1 200); do
+        echo "$MALICIOUS_CONTENT" > "$fixtures/file-${collector}-${i}.exe"
+    done
+
+    # Start collector in background
+    local pid_file; pid_file="$(mktemp /tmp/oper-pid-XXXXXX)"
+    case "$collector" in
+        bash)
+            bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$STUB_PORT" --dir "$fixtures" \
+                --max-age 30 > /dev/null 2>&1 &
+            echo $! > "$pid_file"
+            ;;
+        python)
+            python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                -s localhost -p "$STUB_PORT" -d "$fixtures" \
+                --max-age 30 > /dev/null 2>&1 &
+            echo $! > "$pid_file"
+            ;;
+        perl)
+            perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$STUB_PORT" --dir "$fixtures" \
+                --max-age 30 > /dev/null 2>&1 &
+            echo $! > "$pid_file"
+            ;;
+        ps3)
+            pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 > /dev/null 2>&1 &
+            echo $! > "$pid_file"
+            ;;
+        ps2)
+            pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 > /dev/null 2>&1 &
+            echo $! > "$pid_file"
+            ;;
+    esac
+
+    local coll_pid; coll_pid="$(cat "$pid_file")"
+
+    # Wait for begin marker to appear (collector is running)
+    local waited=0
+    while [ $waited -lt 10 ]; do
+        if query_log 'begin' | grep -q 'begin' 2>/dev/null; then
+            break
+        fi
+        sleep 0.5
+        waited=$((waited + 1))
+    done
+
+    # Send SIGINT (Ctrl-C)
+    kill -INT "$coll_pid" 2>/dev/null || true
+    # Wait for collector to finish
+    wait "$coll_pid" 2>/dev/null || true
+    sync_stub
+    sync_stub  # extra wait for marker
+
+    # Check for interrupted marker
+    local int_entry; int_entry="$(query_log 'interrupted')"
+    if [ -n "$int_entry" ]; then
+        pass "$collector/interrupted-marker: interrupted marker sent on SIGINT"
+    else
+        # Some collectors may not support interrupted markers
+        local end_entry; end_entry="$(query_log 'end')"
+        if [ -n "$end_entry" ]; then
+            # Sent end marker instead of interrupted — acceptable
+            skip "$collector/interrupted-marker: sent end marker instead of interrupted on SIGINT"
+        else
+            fail "$collector/interrupted-marker: no interrupted or end marker on SIGINT"
+        fi
+    fi
+
+    rm -rf "$fixtures" "$pid_file"
+}
+
+# ── 3. Dry-run mode ────────────────────────────────────────────────────────
+test_dry_run() {
+    local collector="$1"
+
+    # PS collectors don't support dry-run
+    case "$collector" in
+        ps3|ps2)
+            skip "$collector/dry-run: not supported"
+            return
+            ;;
+    esac
+
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/dryrun-${collector}.exe"
+
+    local output
+    case "$collector" in
+        bash)   output="$(run_bash "$fixtures" --max-age 30 --dry-run 2>&1)" ;;
+        python) output="$(run_python "$fixtures" --max-age 30 --dry-run 2>&1)" ;;
+        perl)   output="$(run_perl "$fixtures" --max-age 30 --dry-run 2>&1)" ;;
+    esac
+    sync_stub
+
+    # Verify no uploads occurred
+    local upload_entry; upload_entry="$(query_log "dryrun-${collector}")"
+    if [ -n "$upload_entry" ]; then
+        fail "$collector/dry-run: file was uploaded (should not be)"
+    else
+        # Verify the dry-run output mentions the file
+        if echo "$output" | grep -qi "dryrun-${collector}\|dry.run\|would"; then
+            pass "$collector/dry-run: no upload, file listed in output"
+        else
+            fail "$collector/dry-run: no upload, but file not mentioned in output"
+        fi
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 4. Source identifier ────────────────────────────────────────────────────
+test_source_identifier() {
+    local collector="$1"
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/source-${collector}.exe"
+
+    local source_name="test-source-${collector}"
+    case "$collector" in
+        bash)
+            run_bash "$fixtures" --max-age 30 --source "$source_name" >/dev/null 2>&1 || true
+            ;;
+        python)
+            python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                -s localhost -p "$STUB_PORT" -d "$fixtures" \
+                --max-age 30 --source "$source_name" >/dev/null 2>&1 || true
+            ;;
+        perl)
+            perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$STUB_PORT" --dir "$fixtures" \
+                --max-age 30 --source "$source_name" >/dev/null 2>&1 || true
+            ;;
+        ps3)
+            pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 -Source "$source_name" >/dev/null 2>&1 || true
+            ;;
+        ps2)
+            pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 -Source "$source_name" >/dev/null 2>&1 || true
+            ;;
+    esac
+    sync_stub
+
+    # Check collection markers for source field
+    local marker_entry; marker_entry="$(query_log 'begin')"
+    if [ -n "$marker_entry" ]; then
+        local source_in_marker; source_in_marker="$(echo "$marker_entry" | head -1 | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+print(d.get('source', ''))
+" 2>/dev/null)"
+        if [ "$source_in_marker" = "$source_name" ]; then
+            pass "$collector/source-id: source='$source_name' in collection marker"
+        else
+            fail "$collector/source-id: expected source='$source_name', got source='$source_in_marker'"
+        fi
+    else
+        # Check if source is in the upload URL query params
+        local upload_entry; upload_entry="$(query_log "source-${collector}")"
+        if [ -n "$upload_entry" ]; then
+            local src_in_upload; src_in_upload="$(echo "$upload_entry" | head -1 | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+s = d.get('subject', {}).get('source', '')
+print(s)
+" 2>/dev/null)"
+            if [ "$src_in_upload" = "$source_name" ]; then
+                pass "$collector/source-id: source='$source_name' in upload"
+            else
+                pass "$collector/source-id: file uploaded (source may be in URL params)"
+            fi
+        else
+            fail "$collector/source-id: no marker or upload found"
+        fi
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 5. Sync mode ───────────────────────────────────────────────────────────
+test_sync_mode() {
+    local collector="$1"
+
+    # PS collectors don't support --sync flag
+    case "$collector" in
+        ps3|ps2)
+            skip "$collector/sync-mode: not supported (PS always uses checkAsync)"
+            return
+            ;;
+    esac
+
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/sync-${collector}.exe"
+
+    case "$collector" in
+        bash)   run_bash "$fixtures" --max-age 30 --sync >/dev/null 2>&1 || true ;;
+        python) run_python "$fixtures" --max-age 30 --sync >/dev/null 2>&1 || true ;;
+        perl)   run_perl "$fixtures" --max-age 30 --sync >/dev/null 2>&1 || true ;;
+    esac
+    sync_stub
+
+    # In sync mode, the stub logs the scan immediately (no async queue)
+    local entry; entry="$(query_log "sync-${collector}")"
+    if [ -n "$entry" ]; then
+        local score; score="$(echo "$entry" | head -1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('score',0))" 2>/dev/null)"
+        pass "$collector/sync-mode: file scanned synchronously (score=$score)"
+    else
+        fail "$collector/sync-mode: file not found in log"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 6. Multiple scan directories ───────────────────────────────────────────
+test_multiple_dirs() {
+    local collector="$1"
+
+    # PS collectors only accept a single -Folder
+    case "$collector" in
+        ps3|ps2)
+            skip "$collector/multiple-dirs: PS accepts single -Folder only"
+            return
+            ;;
+    esac
+
+    clear_log
+
+    local dir1; dir1="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    local dir2; dir2="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$dir1/multi1-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$dir2/multi2-${collector}.exe"
+
+    case "$collector" in
+        bash)
+            bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$STUB_PORT" \
+                --dir "$dir1" --dir "$dir2" \
+                --max-age 30 >/dev/null 2>&1 || true
+            ;;
+        python)
+            python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                -s localhost -p "$STUB_PORT" \
+                -d "$dir1" "$dir2" \
+                --max-age 30 >/dev/null 2>&1 || true
+            ;;
+        perl)
+            # Perl may only accept a single --dir — test and see
+            perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$STUB_PORT" --dir "$dir1" --dir "$dir2" \
+                --max-age 30 >/dev/null 2>&1 || true
+            ;;
+    esac
+    sync_stub
+
+    local f1; f1="$(query_log "multi1-${collector}")"
+    local f2; f2="$(query_log "multi2-${collector}")"
+
+    if [ -n "$f1" ] && [ -n "$f2" ]; then
+        pass "$collector/multiple-dirs: both directories scanned"
+    elif [ -n "$f1" ] || [ -n "$f2" ]; then
+        # Collector only scanned one dir — might only support single dir
+        if [ -n "$f1" ]; then
+            skip "$collector/multiple-dirs: only first directory scanned (single-dir only?)"
+        else
+            skip "$collector/multiple-dirs: only second directory scanned"
+        fi
+    else
+        fail "$collector/multiple-dirs: neither directory scanned"
+    fi
+
+    rm -rf "$dir1" "$dir2"
+}
+
+# ── 7. 503 back-pressure with Retry-After ──────────────────────────────────
+test_503_backpressure() {
+    local collector="$1"
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/bp503-${collector}.exe"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/bp503b-${collector}.exe"
+
+    # Configure stub: first upload returns 503 with Retry-After: 1
+    # Only the first request gets 503; subsequent requests proceed normally
+    configure_stub '{
+        "upload_rules": [
+            {"match_count": [1], "status": 503, "headers": {"Retry-After": "1"}}
+        ]
+    }'
+
+    local output
+    local collector_exit=0
+    case "$collector" in
+        bash)
+            output="$(timeout 30 bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$STUB_PORT" --dir "$fixtures" --max-age 30 --retries 5 2>&1)" || collector_exit=$?
+            ;;
+        python)
+            output="$(timeout 30 python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                -s localhost -p "$STUB_PORT" -d "$fixtures" --max-age 30 --retries 5 2>&1)" || collector_exit=$?
+            ;;
+        perl)
+            output="$(timeout 30 perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$STUB_PORT" --dir "$fixtures" --max-age 30 --retries 5 2>&1)" || collector_exit=$?
+            ;;
+        ps3)
+            output="$(timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" -MaxAge 30 2>&1)" || collector_exit=$?
+            ;;
+        ps2)
+            output="$(timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" -MaxAge 30 2>&1)" || collector_exit=$?
+            ;;
+    esac
+    sync_stub
+    sync_stub  # extra wait for retry
+
+    # Reset config
+    configure_stub '{"upload_rules": []}'
+
+    # Check that at least one file was eventually submitted
+    local entry; entry="$(query_log "bp503")"
+    if [ -n "$entry" ]; then
+        # Check if output mentions retry/503
+        if echo "$output" | grep -qi '503\|retry\|busy\|back.off\|Retry-After'; then
+            pass "$collector/503-backpressure: retried after 503, file submitted"
+        else
+            pass "$collector/503-backpressure: file submitted (retry may be silent)"
+        fi
+    else
+        if echo "$output" | grep -qi '503\|busy\|Service Unavailable'; then
+            fail "$collector/503-backpressure: got 503 but never retried successfully"
+        else
+            fail "$collector/503-backpressure: no evidence of 503 handling"
+        fi
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 8. Progress reporting ──────────────────────────────────────────────────
+test_progress_reporting() {
+    local collector="$1"
+
+    # PS collectors use -Progress (switch) — handled differently
+    local progress_flag
+    case "$collector" in
+        bash)   progress_flag="--progress" ;;
+        python) progress_flag="--progress" ;;
+        perl)   progress_flag="--progress" ;;
+        ps3)    progress_flag="-Progress" ;;
+        ps2)    progress_flag="-Progress" ;;
+    esac
+
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    for i in $(seq 1 5); do
+        echo "$MALICIOUS_CONTENT" > "$fixtures/prog-${collector}-${i}.exe"
+    done
+
+    local output
+    case "$collector" in
+        bash)
+            output="$(timeout 30 bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+                --server localhost --port "$STUB_PORT" --dir "$fixtures" --max-age 30 --progress 2>&1)" || true
+            ;;
+        python)
+            output="$(timeout 30 python3 "${COLLECTOR_DIR}/thunderstorm-collector.py" \
+                -s localhost -p "$STUB_PORT" -d "$fixtures" --max-age 30 --progress 2>&1)" || true
+            ;;
+        perl)
+            output="$(timeout 30 perl "${COLLECTOR_DIR}/thunderstorm-collector.pl" \
+                -s localhost -p "$STUB_PORT" --dir "$fixtures" --max-age 30 --progress 2>&1)" || true
+            ;;
+        ps3)
+            output="$(timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 -Progress 2>&1)" || true
+            ;;
+        ps2)
+            output="$(timeout 30 pwsh -NoProfile -File "${COLLECTOR_DIR}/thunderstorm-collector-ps2.ps1" \
+                -ThunderstormServer localhost -ThunderstormPort "$STUB_PORT" -Folder "$fixtures" \
+                -MaxAge 30 -Progress 2>&1)" || true
+            ;;
+    esac
+    sync_stub
+
+    # Check collector didn't crash and produced some output
+    local submitted; submitted="$(query_log "prog-${collector}")"
+    if [ -n "$submitted" ]; then
+        pass "$collector/progress: collector ran successfully with progress flag"
+    else
+        fail "$collector/progress: no files submitted with progress flag"
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 9. Syslog logging ─────────────────────────────────────────────────────
+test_syslog_logging() {
+    local collector="$1"
+
+    # Only bash supports --syslog
+    case "$collector" in
+        bash) ;;
+        *)
+            skip "$collector/syslog: not supported"
+            return
+            ;;
+    esac
+
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/syslog-${collector}.exe"
+
+    # Run with --syslog — just verify it doesn't crash
+    local output; output="$(run_bash "$fixtures" --max-age 30 --syslog 2>&1)" || true
+    sync_stub
+
+    local entry; entry="$(query_log "syslog-${collector}")"
+    if [ -n "$entry" ]; then
+        pass "$collector/syslog: collector ran successfully with --syslog"
+    else
+        # Even if upload fails, the collector shouldn't crash with --syslog
+        if echo "$output" | grep -qi 'error\|crash\|abort'; then
+            fail "$collector/syslog: collector crashed with --syslog"
+        else
+            pass "$collector/syslog: collector ran with --syslog (no crash)"
+        fi
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ── 10. curl vs wget fallback (bash only) ──────────────────────────────────
+test_wget_fallback() {
+    local collector="$1"
+
+    case "$collector" in
+        bash) ;;
+        *)
+            skip "$collector/wget-fallback: bash only"
+            return
+            ;;
+    esac
+
+    # Check if wget is available
+    if ! command -v wget >/dev/null 2>&1; then
+        skip "$collector/wget-fallback: wget not installed"
+        return
+    fi
+
+    clear_log
+
+    local fixtures; fixtures="$(mktemp -d /tmp/oper-test-XXXXXX)"
+    echo "$MALICIOUS_CONTENT" > "$fixtures/wget-${collector}.exe"
+
+    # Build a PATH that excludes directories containing real curl, but includes wget
+    local wget_path; wget_path="$(command -v wget 2>/dev/null)"
+    if [ -z "$wget_path" ]; then
+        skip "$collector/wget-fallback: wget not installed"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    local wget_dir; wget_dir="$(dirname "$wget_path")"
+    # Build a minimal PATH with only wget's directory and standard utils (but no curl)
+    local clean_path="$wget_dir:/usr/sbin:/sbin"
+    # Verify curl is NOT on this path
+    if env PATH="$clean_path" command -v curl >/dev/null 2>&1; then
+        # curl is in the same dir as wget — can't isolate
+        skip "$collector/wget-fallback: curl and wget in same directory, cannot isolate"
+        rm -rf "$fixtures"
+        return
+    fi
+
+    local output
+    output="$(timeout 30 env PATH="$clean_path" \
+        bash "${COLLECTOR_DIR}/thunderstorm-collector.sh" \
+        --server localhost --port "$STUB_PORT" --dir "$fixtures" \
+        --max-age 30 2>&1)" || true
+    sync_stub
+
+    local entry; entry="$(query_log "wget-${collector}")"
+    if [ -n "$entry" ]; then
+        pass "$collector/wget-fallback: file submitted via wget"
+    else
+        if echo "$output" | grep -qi 'wget'; then
+            fail "$collector/wget-fallback: detected wget but upload failed"
+        else
+            skip "$collector/wget-fallback: could not isolate wget from curl"
+        fi
+    fi
+
+    rm -rf "$fixtures"
+}
+
+# ============================================================================
+# Main
+# ============================================================================
+
+echo ""
+printf "${BOLD}Operational Feature Tests${RESET}\n"
+echo "============================================"
+echo ""
+
+start_stub
+
+COLLECTORS=("bash" "python" "perl" "ps3" "ps2")
+
+for collector in "${COLLECTORS[@]}"; do
+    printf "\n${CYAN}── $collector ──${RESET}\n"
+
+    test_collection_markers "$collector"
+    test_interrupted_marker "$collector"
+    test_dry_run "$collector"
+    test_source_identifier "$collector"
+    test_sync_mode "$collector"
+    test_multiple_dirs "$collector"
+    test_503_backpressure "$collector"
+    test_progress_reporting "$collector"
+    test_syslog_logging "$collector"
+    test_wget_fallback "$collector"
+done
+
+stop_stub
+
+echo ""
+echo "============================================"
+printf " Results: ${GREEN}%d passed${RESET}, ${RED}%d failed${RESET}, ${YELLOW}%d skipped${RESET}\n" \
+    "$TESTS_PASSED" "$TESTS_FAILED" "$TESTS_SKIPPED"
+echo "============================================"
+
+if [ -n "$FAILED_NAMES" ]; then
+    echo ""
+    printf "${RED}Failed tests:${RESET}\n"
+    printf "$FAILED_NAMES"
+fi
+
+echo ""
+[ "$TESTS_FAILED" -eq 0 ] && exit 0 || exit 1
diff --git a/scripts/tests/run_tests.sh b/scripts/tests/run_tests.sh
new file mode 100755
index 0000000..66c6fab
--- /dev/null
+++ b/scripts/tests/run_tests.sh
@@ -0,0 +1,805 @@
+#!/usr/bin/env bash
+#
+# Test suite for the bash collector.
+#
+# Modes:
+#   1. Stub server (CI/GitHub Actions):
+#      Provide a thunderstorm-stub-server binary. Tests start/stop it automatically.
+#      ./scripts/tests/run_tests.sh [path/to/thunderstorm-stub-server]
+#
+#   2. External server (real Thunderstorm or already-running stub):
+#      Set THUNDERSTORM_TEST_SERVER and THUNDERSTORM_TEST_PORT.
+#      Skips tests that require stub-side verification (audit log, uploads dir).
+#      THUNDERSTORM_TEST_SERVER=10.0.0.5 THUNDERSTORM_TEST_PORT=8081 ./scripts/tests/run_tests.sh
+#
+# Environment variables:
+#   STUB_SERVER_BIN          Path to thunderstorm-stub-server binary
+#   THUNDERSTORM_TEST_SERVER External server host (skips stub lifecycle)
+#   THUNDERSTORM_TEST_PORT   External server port (default: 8080)
+#   TEST_FILTER              Run only tests matching this grep pattern
+#
+# Stub binary lookup order (when no external server):
+#   1. First CLI argument
+#   2. $STUB_SERVER_BIN
+#   3. ../thunderstorm-stub-server/thunderstorm-stub-server (sibling checkout)
+#   4. thunderstorm-stub-server in $PATH
+
+set -euo pipefail
+
+TESTS_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$TESTS_DIR/../.." && pwd)"
+COLLECTOR="$REPO_ROOT/scripts/thunderstorm-collector.sh"
+
+# ── Locate stub server ────────────────────────────────────────────────────────
+
+find_stub_server() {
+    if [ -n "${1:-}" ] && [ -x "$1" ]; then
+        echo "$1"; return 0
+    fi
+    if [ -n "${STUB_SERVER_BIN:-}" ] && [ -x "$STUB_SERVER_BIN" ]; then
+        echo "$STUB_SERVER_BIN"; return 0
+    fi
+    local sibling="$REPO_ROOT/../thunderstorm-stub-server/thunderstorm-stub-server"
+    if [ -x "$sibling" ]; then
+        echo "$sibling"; return 0
+    fi
+    if command -v thunderstorm-stub-server >/dev/null 2>&1; then
+        command -v thunderstorm-stub-server; return 0
+    fi
+    return 1
+}
+
+# ── Mode selection ─────────────────────────────────────────────────────────────
+
+EXTERNAL_SERVER="${THUNDERSTORM_TEST_SERVER:-}"
+EXTERNAL_PORT="${THUNDERSTORM_TEST_PORT:-8080}"
+USE_EXTERNAL=0
+STUB_BIN=""
+
+if [ -n "$EXTERNAL_SERVER" ]; then
+    USE_EXTERNAL=1
+else
+    STUB_BIN="$(find_stub_server "${1:-}")" || {
+        echo "ERROR: thunderstorm-stub-server binary not found." >&2
+        echo "Build it: cd ../thunderstorm-stub-server && go build -o thunderstorm-stub-server ." >&2
+        echo "Or set THUNDERSTORM_TEST_SERVER to use an external server." >&2
+        exit 1
+    }
+fi
+
+# ── Test infrastructure ──────────────────────────────────────────────────────
+
+STUB_PORT=0
+STUB_PID=""
+TEST_TMP=""
+UPLOADS_DIR=""
+AUDIT_LOG=""
+STUB_LOG=""
+TESTS_RUN=0
+TESTS_PASSED=0
+TESTS_FAILED=0
+FAILED_NAMES=""
+
+# Colours (disabled if not a terminal)
+if [ -t 1 ]; then
+    GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[0;33m'; BOLD='\033[1m'; RESET='\033[0m'
+else
+    GREEN=''; RED=''; YELLOW=''; BOLD=''; RESET=''
+fi
+
+setup_tmp() {
+    TEST_TMP="$(mktemp -d)"
+    UPLOADS_DIR="$TEST_TMP/uploads"
+    AUDIT_LOG="$TEST_TMP/audit.jsonl"
+    STUB_LOG="$TEST_TMP/stub.log"
+    mkdir -p "$UPLOADS_DIR"
+}
+
+cleanup() {
+    stop_stub
+    if [ -n "$TEST_TMP" ] && [ -d "$TEST_TMP" ]; then
+        rm -rf "$TEST_TMP"
+    fi
+}
+trap cleanup EXIT INT TERM
+
+# Pick an available port
+pick_port() {
+    local port
+    if command -v python3 >/dev/null 2>&1; then
+        port="$(python3 -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()' 2>/dev/null || true)"
+        if [ -n "$port" ] && [ "$port" -ge 1 ] 2>/dev/null; then
+            echo "$port"
+            return 0
+        fi
+    fi
+    if command -v shuf >/dev/null 2>&1; then
+        shuf -i 10000-60000 -n 1
+    else
+        echo $(( RANDOM % 50000 + 10000 ))
+    fi
+}
+
+start_stub() {
+    if [ "$USE_EXTERNAL" -eq 1 ]; then
+        STUB_PORT="$EXTERNAL_PORT"
+        return 0
+    fi
+    STUB_PORT="$(pick_port)"
+    # Clean state for each test
+    rm -rf "$UPLOADS_DIR"/* "$AUDIT_LOG" 2>/dev/null || true
+    "$STUB_BIN" \
+        --port "$STUB_PORT" \
+        --uploads-dir "$UPLOADS_DIR" \
+        --log-file "$AUDIT_LOG" \
+        >"$STUB_LOG" 2>&1 &
+    STUB_PID=$!
+    # Wait for server readiness
+    local i
+    for i in $(seq 1 30); do
+        if curl -fsS "http://127.0.0.1:$STUB_PORT/api/status" >/dev/null 2>&1; then
+            return 0
+        fi
+        sleep 0.2
+    done
+    echo "ERROR: Stub server did not start on port $STUB_PORT" >&2
+    cat "$STUB_LOG" >&2
+    return 1
+}
+
+stop_stub() {
+    if [ "$USE_EXTERNAL" -eq 1 ]; then
+        return 0
+    fi
+    if [ -n "$STUB_PID" ]; then
+        kill "$STUB_PID" 2>/dev/null || true
+        wait "$STUB_PID" 2>/dev/null || true
+        STUB_PID=""
+    fi
+}
+
+restart_stub() {
+    stop_stub
+    start_stub
+}
+
+# Whether stub-side verification (audit log, uploads dir) is available
+has_stub_verification() {
+    [ "$USE_EXTERNAL" -eq 0 ]
+}
+
+# The server address used by the collector
+server_host() {
+    if [ "$USE_EXTERNAL" -eq 1 ]; then
+        echo "$EXTERNAL_SERVER"
+    else
+        echo "127.0.0.1"
+    fi
+}
+
+# Run collector with standard flags, additional args appended
+run_collector() {
+    bash "$COLLECTOR" \
+        --server "$(server_host)" \
+        --port "$STUB_PORT" \
+        --no-log-file \
+        "$@" 2>&1
+}
+
+# Get scanned_samples from stub /api/status
+stub_scanned() {
+    curl -fsS "http://127.0.0.1:$STUB_PORT/api/status" 2>/dev/null \
+        | python3 -c "import sys,json; print(json.load(sys.stdin)['scanned_samples'])" 2>/dev/null || echo 0
+}
+
+# Count files in uploads dir
+upload_count() {
+    find "$UPLOADS_DIR" -type f 2>/dev/null | wc -l | tr -d ' '
+}
+
+# Extract stat from collector output: "scanned=4 submitted=3 ..."
+parse_collector_stat() {
+    local output="$1" key="$2"
+    echo "$output" | grep -oE "${key}=[0-9]+" | tail -1 | cut -d= -f2
+}
+
+# ── Test result helpers ──────────────────────────────────────────────────────
+
+assert_eq() {
+    local label="$1" expected="$2" actual="$3"
+    if [ "$expected" != "$actual" ]; then
+        printf "    ${RED}FAIL${RESET}: %s — expected '%s', got '%s'\n" "$label" "$expected" "$actual"
+        return 1
+    fi
+    return 0
+}
+
+assert_ge() {
+    local label="$1" min="$2" actual="$3"
+    if [ "$actual" -lt "$min" ] 2>/dev/null; then
+        printf "    ${RED}FAIL${RESET}: %s — expected >= %s, got '%s'\n" "$label" "$min" "$actual"
+        return 1
+    fi
+    return 0
+}
+
+assert_contains() {
+    local label="$1" needle="$2" haystack="$3"
+    if ! echo "$haystack" | grep -qF -- "$needle"; then
+        printf "    ${RED}FAIL${RESET}: %s — output does not contain '%s'\n" "$label" "$needle"
+        return 1
+    fi
+    return 0
+}
+
+assert_not_contains() {
+    local label="$1" needle="$2" haystack="$3"
+    if echo "$haystack" | grep -qF -- "$needle"; then
+        printf "    ${RED}FAIL${RESET}: %s — output unexpectedly contains '%s'\n" "$label" "$needle"
+        return 1
+    fi
+    return 0
+}
+
+run_test() {
+    local name="$1"
+    # Filter support
+    if [ -n "${TEST_FILTER:-}" ] && ! echo "$name" | grep -q "$TEST_FILTER"; then
+        return 0
+    fi
+    TESTS_RUN=$((TESTS_RUN + 1))
+    printf "  ${BOLD}%-55s${RESET}" "$name"
+    if "$name"; then
+        printf " ${GREEN}PASS${RESET}\n"
+        TESTS_PASSED=$((TESTS_PASSED + 1))
+    else
+        printf " ${RED}FAIL${RESET}\n"
+        TESTS_FAILED=$((TESTS_FAILED + 1))
+        FAILED_NAMES="$FAILED_NAMES  - $name\n"
+    fi
+}
+
+# ── Test fixtures ────────────────────────────────────────────────────────────
+
+create_sample_dir() {
+    local dir="$TEST_TMP/samples/$1"
+    mkdir -p "$dir"
+    echo "$dir"
+}
+
+create_file() {
+    local path="$1"
+    shift
+    mkdir -p "$(dirname "$path")"
+    if [ $# -gt 0 ]; then
+        printf '%s' "$1" > "$path"
+    else
+        printf 'sample content %s\n' "$(basename "$path")" > "$path"
+    fi
+}
+
+create_file_bytes() {
+    local path="$1" size="$2"
+    mkdir -p "$(dirname "$path")"
+    dd if=/dev/urandom of="$path" bs=1 count="$size" 2>/dev/null
+}
+
+set_file_age_days() {
+    local path="$1" days="$2"
+    local ts
+    if date --version >/dev/null 2>&1; then
+        # GNU date
+        ts="$(date -d "$days days ago" +%Y%m%d%H%M.%S)"
+    else
+        # BSD date
+        ts="$(date -v-${days}d +%Y%m%d%H%M.%S)"
+    fi
+    touch -t "$ts" "$path"
+}
+
+# ══════════════════════════════════════════════════════════════════════════════
+# TESTS
+# ══════════════════════════════════════════════════════════════════════════════
+
+# ── 1. Basic upload (async) ──────────────────────────────────────────────────
+
+test_basic_async_upload() {
+    restart_stub
+    local d; d="$(create_sample_dir basic_async)"
+    create_file "$d/a.txt"
+    create_file "$d/b.bin"
+    create_file "$d/c.dat"
+
+    local out; out="$(run_collector --dir "$d" --source basic-async --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+
+    assert_eq "submitted" "3" "$submitted" || return 1
+    assert_eq "failed" "0" "$failed" || return 1
+    # Wait briefly for async processing, then check server
+    sleep 0.5
+    assert_ge "stub scanned" 3 "$(stub_scanned)" || return 1
+}
+
+# ── 2. Basic upload (sync) ──────────────────────────────────────────────────
+
+test_basic_sync_upload() {
+    has_stub_verification || { echo "    (skipped: sync scan too slow on external server)"; return 0; }
+    restart_stub
+    local d; d="$(create_sample_dir basic_sync)"
+    create_file "$d/sample.bin"
+
+    local out; out="$(run_collector --dir "$d" --sync --source sync-test --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    assert_eq "submitted" "1" "$submitted" || return 1
+    assert_eq "upload_count" "1" "$(upload_count)" || return 1
+}
+
+# ── 3. Dry-run: no uploads ──────────────────────────────────────────────────
+
+test_dry_run_no_uploads() {
+    restart_stub
+    local d; d="$(create_sample_dir dry_run)"
+    create_file "$d/a.txt"
+    create_file "$d/b.txt"
+
+    local out; out="$(run_collector --dir "$d" --dry-run --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    assert_eq "submitted" "2" "$submitted" || return 1
+    if has_stub_verification; then
+        assert_eq "upload_count" "0" "$(upload_count)" || return 1
+        assert_eq "stub_scanned" "0" "$(stub_scanned)" || return 1
+    fi
+}
+
+# ── 4. Max file size filter ─────────────────────────────────────────────────
+
+test_max_file_size_filter() {
+    restart_stub
+    local d; d="$(create_sample_dir size_filter)"
+    create_file "$d/small.bin" "small"                    # ~5 bytes
+    create_file_bytes "$d/big.bin" 60000                  # ~59 KB
+
+    # Set max size to 50 KB
+    local out; out="$(run_collector --dir "$d" --max-size-kb 50 --max-age 30 --debug)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local skipped; skipped="$(parse_collector_stat "$out" skipped)"
+
+    assert_eq "submitted" "1" "$submitted" || return 1
+    assert_eq "skipped" "1" "$skipped" || return 1
+}
+
+# ── 5. Max age filter ───────────────────────────────────────────────────────
+
+test_max_age_filter() {
+    restart_stub
+    local d; d="$(create_sample_dir age_filter)"
+    create_file "$d/recent.txt" "new"
+    create_file "$d/old.txt" "old"
+    set_file_age_days "$d/old.txt" 60
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local scanned; scanned="$(parse_collector_stat "$out" scanned)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    # find -mtime -30 should exclude the 60-day-old file entirely
+    assert_eq "scanned" "1" "$scanned" || return 1
+    assert_eq "submitted" "1" "$submitted" || return 1
+}
+
+# ── 6. Multiple directories ─────────────────────────────────────────────────
+
+test_multiple_directories() {
+    restart_stub
+    local d1; d1="$(create_sample_dir multi_a)"
+    local d2; d2="$(create_sample_dir multi_b)"
+    create_file "$d1/x.txt"
+    create_file "$d2/y.txt"
+    create_file "$d2/z.txt"
+
+    local out; out="$(run_collector --dir "$d1" --dir "$d2" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    assert_eq "submitted" "3" "$submitted" || return 1
+}
+
+# ── 7. Non-existent directory warning ────────────────────────────────────────
+
+test_nonexistent_directory_warning() {
+    restart_stub
+    local d; d="$(create_sample_dir exists)"
+    create_file "$d/a.txt"
+
+    # Also pass a non-existent dir — collector should warn but continue
+    local out; out="$(bash "$COLLECTOR" \
+        --server "$(server_host)" --port "$STUB_PORT" --no-log-file \
+        --dir /nonexistent_path_$RANDOM --dir "$d" --max-age 30 2>&1)"
+
+    assert_contains "warn about missing dir" "non-directory" "$out" || return 1
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    assert_eq "submitted" "1" "$submitted" || return 1
+}
+
+# ── 8. Source parameter arrives at server ────────────────────────────────────
+
+test_source_parameter_received() {
+    has_stub_verification || { echo "    (skipped: needs stub server)"; return 0; }
+    restart_stub
+    local d; d="$(create_sample_dir source_test)"
+    create_file "$d/s.bin"
+
+    run_collector --dir "$d" --source "my-test-source" --sync --max-age 30 >/dev/null
+    sleep 0.3
+
+    # Check the JSONL audit log for the source
+    assert_contains "source in audit log" "my-test-source" "$(cat "$AUDIT_LOG" 2>/dev/null)" || return 1
+}
+
+# ── 9. File content integrity ────────────────────────────────────────────────
+
+test_file_content_integrity() {
+    has_stub_verification || { echo "    (skipped: needs stub server)"; return 0; }
+    restart_stub
+    local d; d="$(create_sample_dir integrity)"
+    local content="THUNDERSTORM_INTEGRITY_TEST_$(date +%s)"
+    create_file "$d/check.bin" "$content"
+    local expected_sha; expected_sha="$(sha256sum "$d/check.bin" | awk '{print $1}')"
+
+    run_collector --dir "$d" --sync --max-age 30 >/dev/null
+    sleep 0.3
+
+    # Verify the uploaded file has the same hash
+    local uploaded_file
+    uploaded_file="$(find "$UPLOADS_DIR" -type f | head -1)"
+    [ -n "$uploaded_file" ] || { printf "    ${RED}FAIL${RESET}: no uploaded file found\n"; return 1; }
+    local actual_sha; actual_sha="$(sha256sum "$uploaded_file" | awk '{print $1}')"
+    assert_eq "sha256" "$expected_sha" "$actual_sha" || return 1
+}
+
+# ── 10. Filename with spaces ────────────────────────────────────────────────
+
+test_filename_with_spaces() {
+    restart_stub
+    local d; d="$(create_sample_dir spaces)"
+    create_file "$d/my important file.txt" "spaces test"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+
+    assert_eq "submitted" "1" "$submitted" || return 1
+    assert_eq "failed" "0" "$failed" || return 1
+}
+
+# ── 11. Filename with special characters ────────────────────────────────────
+
+test_filename_special_chars() {
+    restart_stub
+    local d; d="$(create_sample_dir special)"
+    # Filenames that stress multipart encoding
+    create_file "$d/file with (parens).txt" "parens"
+    create_file "$d/file'with'quotes.txt" "quotes"
+    create_file "$d/file&with&amps.bin" "amps"
+    # Semicolons and double-quotes are sanitized by the collector
+    create_file "$d/normal.txt" "baseline"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+
+    assert_eq "submitted" "4" "$submitted" || return 1
+    assert_eq "failed" "0" "$failed" || return 1
+}
+
+# ── 12. Empty directory ─────────────────────────────────────────────────────
+
+test_empty_directory() {
+    restart_stub
+    local d; d="$(create_sample_dir empty)"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local scanned; scanned="$(parse_collector_stat "$out" scanned)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    assert_eq "scanned" "0" "$scanned" || return 1
+    assert_eq "submitted" "0" "$submitted" || return 1
+}
+
+# ── 13. Nested directories ──────────────────────────────────────────────────
+
+test_nested_directories() {
+    restart_stub
+    local d; d="$(create_sample_dir nested)"
+    create_file "$d/top.txt"
+    create_file "$d/a/mid.txt"
+    create_file "$d/a/b/deep.txt"
+    create_file "$d/a/b/c/deeper.txt"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    assert_eq "submitted" "4" "$submitted" || return 1
+}
+
+# ── 14. Symlinks are not followed ───────────────────────────────────────────
+
+test_symlinks_not_followed() {
+    restart_stub
+    local d; d="$(create_sample_dir symlinks)"
+    local other; other="$(create_sample_dir symlink_target)"
+    create_file "$d/real.txt"
+    create_file "$other/secret.txt"
+    ln -sf "$other" "$d/link_to_other" 2>/dev/null || {
+        # Skip on systems that don't support symlinks in temp
+        return 0
+    }
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+
+    # find -type f only returns regular files, not symlink targets
+    # But find does follow symlinked directories by default on some systems.
+    # The key thing: real.txt should always be submitted.
+    assert_ge "submitted at least real.txt" 1 "$submitted" || return 1
+}
+
+# ── 15. Validation: invalid port ────────────────────────────────────────────
+
+test_invalid_port_rejected() {
+    local out; out="$(bash "$COLLECTOR" \
+        --server 127.0.0.1 --port "notaport" --no-log-file \
+        --dir /tmp --max-age 30 2>&1)" || true
+
+    assert_contains "port validation" "Port must be numeric" "$out" || return 1
+}
+
+# ── 16. Validation: invalid max-age ─────────────────────────────────────────
+
+test_invalid_max_age_rejected() {
+    local out; out="$(bash "$COLLECTOR" \
+        --server 127.0.0.1 --port 8080 --no-log-file \
+        --dir /tmp --max-age "abc" 2>&1)" || true
+
+    assert_contains "max-age validation" "max-age must be numeric" "$out" || return 1
+}
+
+# ── 17. Validation: invalid max-size-kb ──────────────────────────────────────
+
+test_invalid_max_size_rejected() {
+    local out; out="$(bash "$COLLECTOR" \
+        --server 127.0.0.1 --port 8080 --no-log-file \
+        --dir /tmp --max-size-kb "xyz" 2>&1)" || true
+
+    assert_contains "max-size validation" "max-size-kb must be numeric" "$out" || return 1
+}
+
+# ── 18. Validation: missing server ───────────────────────────────────────────
+
+test_missing_server_rejected() {
+    local out; out="$(bash "$COLLECTOR" \
+        --server "" --port 8080 --no-log-file \
+        --dir /tmp 2>&1)" || true
+
+    # Empty string is caught as "Missing value" by the arg parser
+    assert_contains "server validation" "Missing value" "$out" || return 1
+}
+
+# ── 19. Unknown option rejected ──────────────────────────────────────────────
+
+test_unknown_option_rejected() {
+    local out; out="$(bash "$COLLECTOR" \
+        --server 127.0.0.1 --port 8080 --no-log-file \
+        --dir /tmp --bogus-flag 2>&1)" || true
+
+    assert_contains "unknown option" "Unknown option" "$out" || return 1
+}
+
+# ── 20. Help flag ────────────────────────────────────────────────────────────
+
+test_help_flag() {
+    local out; out="$(bash "$COLLECTOR" --help 2>&1)"
+
+    assert_contains "help shows usage" "Usage:" "$out" || return 1
+    assert_contains "help shows options" "--server" "$out" || return 1
+    assert_contains "help shows examples" "Examples:" "$out" || return 1
+}
+
+# ── 21. Log file is written ─────────────────────────────────────────────────
+
+test_log_file_written() {
+    restart_stub
+    local d; d="$(create_sample_dir log_file)"
+    create_file "$d/a.txt"
+    local log_path="$TEST_TMP/collector-test.log"
+
+    bash "$COLLECTOR" \
+        --server "$(server_host)" --port "$STUB_PORT" \
+        --dir "$d" --max-age 30 --source log-test \
+        --log-file "$log_path" --quiet 2>&1 >/dev/null
+
+    [ -f "$log_path" ] || { printf "    ${RED}FAIL${RESET}: log file not created\n"; return 1; }
+    assert_contains "log has collector info" "Thunderstorm Collector" "$(cat "$log_path")" || return 1
+    assert_contains "log has completion" "Run completed" "$(cat "$log_path")" || return 1
+}
+
+# ── 22. Source URL-encoding ──────────────────────────────────────────────────
+
+test_source_url_encoding() {
+    has_stub_verification || { echo "    (skipped: needs stub server)"; return 0; }
+    restart_stub
+    local d; d="$(create_sample_dir urlenc)"
+    create_file "$d/a.bin"
+
+    run_collector --dir "$d" --source "host with spaces" --sync --max-age 30 >/dev/null
+    sleep 0.3
+
+    # The source should arrive at the server (URL-decoded)
+    assert_contains "source in audit" "host with spaces" "$(cat "$AUDIT_LOG" 2>/dev/null)" || return 1
+}
+
+# ── 23. Retries on server down ───────────────────────────────────────────────
+
+test_retries_on_connection_failure() {
+    # Don't start stub — let it fail
+    stop_stub
+    local d; d="$(create_sample_dir retry_fail)"
+    create_file "$d/a.txt"
+
+    local dead_port; dead_port="$(pick_port)"
+    local out; out="$(bash "$COLLECTOR" \
+        --server 127.0.0.1 --port "$dead_port" --no-log-file \
+        --dir "$d" --max-age 30 --retries 2 2>&1)"
+
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+    assert_eq "failed" "1" "$failed" || return 1
+    assert_contains "retry message" "attempt" "$out" || return 1
+}
+
+# ── 24. Full path as multipart filename ──────────────────────────────────────
+
+test_full_path_sent_as_filename() {
+    restart_stub
+    local d; d="$(create_sample_dir fullpath)"
+    create_file "$d/sample.bin" "path test"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+
+    assert_eq "submitted" "1" "$submitted" || return 1
+    assert_eq "failed" "0" "$failed" || return 1
+}
+
+# ── 25. Zero-byte file ──────────────────────────────────────────────────────
+
+test_zero_byte_file() {
+    restart_stub
+    local d; d="$(create_sample_dir zerobyte)"
+    : > "$d/empty.bin"
+
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    local failed; failed="$(parse_collector_stat "$out" failed)"
+
+    # Zero-byte file: size 0 KB, should pass size filter (it's under any limit)
+    # and be submitted (the server may or may not accept it — that's server-side)
+    assert_ge "submitted or failed" 1 "$((submitted + failed))" || return 1
+}
+
+# ── 26. Max-age 0 includes all files ────────────────────────────────────────
+
+test_max_age_zero_includes_all() {
+    restart_stub
+    local d; d="$(create_sample_dir age_zero)"
+    create_file "$d/recent.txt" "new"
+    create_file "$d/old.txt" "old"
+    set_file_age_days "$d/old.txt" 365
+
+    local out; out="$(run_collector --dir "$d" --max-age 0)"
+    local scanned; scanned="$(parse_collector_stat "$out" scanned)"
+
+    # -mtime -0 matches files modified in the last 0 days (i.e., today or
+    # the last 24h, which depends on find implementation). This is tricky.
+    # With max-age 0, the collector uses find -mtime -0. On GNU find this
+    # matches files modified in the last 24h. The old file should be excluded.
+    # This test documents the actual behavior.
+    assert_ge "scanned at least 1" 1 "$scanned" || return 1
+}
+
+# ── 27. Max-age CLI override actually takes effect ───────────────────────────
+
+test_max_age_cli_override_applied() {
+    restart_stub
+    local d; d="$(create_sample_dir age_override)"
+    create_file "$d/recent.txt" "new"
+    create_file "$d/medium.txt" "medium age"
+    set_file_age_days "$d/medium.txt" 20
+
+    # Default MAX_AGE is 14 days. Pass --max-age 30 on CLI.
+    # If the bug where find_mtime was set before parse_args is present,
+    # the 20-day-old file would be excluded (find -mtime -14).
+    # With the fix, --max-age 30 means find -mtime -30, so it's included.
+    local out; out="$(run_collector --dir "$d" --max-age 30)"
+    local scanned; scanned="$(parse_collector_stat "$out" scanned)"
+
+    assert_eq "scanned" "2" "$scanned" || return 1
+}
+
+# ── 28. Positional directory args ────────────────────────────────────────────
+
+test_positional_directory_args() {
+    restart_stub
+    local d1; d1="$(create_sample_dir pos_a)"
+    local d2; d2="$(create_sample_dir pos_b)"
+    create_file "$d1/x.txt"
+    create_file "$d2/y.txt"
+
+    # Pass directories as positional args (not --dir)
+    local out; out="$(bash "$COLLECTOR" \
+        --server "$(server_host)" --port "$STUB_PORT" --no-log-file \
+        --max-age 30 "$d1" "$d2" 2>&1)"
+
+    local submitted; submitted="$(parse_collector_stat "$out" submitted)"
+    assert_eq "submitted" "2" "$submitted" || return 1
+}
+
+# ══════════════════════════════════════════════════════════════════════════════
+# RUN
+# ══════════════════════════════════════════════════════════════════════════════
+
+printf "\n${BOLD}Thunderstorm Bash Collector — Test Suite${RESET}\n"
+printf "  Collector: %s\n" "$COLLECTOR"
+if [ "$USE_EXTERNAL" -eq 1 ]; then
+    printf "  Server:    %s:%s (external)\n" "$EXTERNAL_SERVER" "$EXTERNAL_PORT"
+    printf "  Note:      stub-verification tests will be skipped\n\n"
+else
+    printf "  Stub:      %s\n\n" "$STUB_BIN"
+fi
+
+setup_tmp
+
+# Validation tests (no server needed)
+run_test test_help_flag
+run_test test_invalid_port_rejected
+run_test test_invalid_max_age_rejected
+run_test test_invalid_max_size_rejected
+run_test test_missing_server_rejected
+run_test test_unknown_option_rejected
+
+# Functional tests (need stub server)
+run_test test_basic_async_upload
+run_test test_basic_sync_upload
+run_test test_dry_run_no_uploads
+run_test test_max_file_size_filter
+run_test test_max_age_filter
+run_test test_multiple_directories
+run_test test_nonexistent_directory_warning
+run_test test_source_parameter_received
+run_test test_file_content_integrity
+run_test test_filename_with_spaces
+run_test test_filename_special_chars
+run_test test_empty_directory
+run_test test_nested_directories
+run_test test_symlinks_not_followed
+run_test test_log_file_written
+run_test test_source_url_encoding
+run_test test_retries_on_connection_failure
+run_test test_full_path_sent_as_filename
+run_test test_zero_byte_file
+run_test test_max_age_zero_includes_all
+run_test test_max_age_cli_override_applied
+run_test test_positional_directory_args
+
+# Summary
+printf "\n${BOLD}Results:${RESET} %d/%d passed" "$TESTS_PASSED" "$TESTS_RUN"
+if [ "$TESTS_FAILED" -gt 0 ]; then
+    printf ", ${RED}%d failed${RESET}\n" "$TESTS_FAILED"
+    printf "\n${RED}Failed tests:${RESET}\n"
+    printf "$FAILED_NAMES"
+    exit 1
+else
+    printf " ${GREEN}✓${RESET}\n\n"
+    exit 0
+fi
diff --git a/scripts/tests/test_perl_large.sh b/scripts/tests/test_perl_large.sh
new file mode 100644
index 0000000..fa24142
--- /dev/null
+++ b/scripts/tests/test_perl_large.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# Quick test for Perl large file detection
+set -e
+
+STUB_LOG="${STUB_LOG:-/tmp/perl-quick.jsonl}"
+STUB_PORT="${STUB_PORT:-18097}"
+STUB_BIN="${STUB_BIN_PATH:-/home/neo/.openclaw/workspace/projects/thunderstorm-stub-server/thunderstorm-stub}"
+STUB_RULES="${STUB_RULES_DIR:-/home/neo/.openclaw/workspace/projects/thunderstorm-stub-server/rules}"
+COLLECTOR_DIR="/home/neo/.openclaw/workspace/projects/thunderstorm-collector-pr/scripts"
+
+# Start stub if not running
+if ! curl -s "http://localhost:$STUB_PORT/api/info" >/dev/null 2>&1; then
+    rm -f "$STUB_LOG"
+    "$STUB_BIN" -port "$STUB_PORT" -rules-dir "$STUB_RULES" -log-file "$STUB_LOG" &
+    sleep 2
+fi
+
+# Create fixture
+FIXTURES=$(mktemp -d)
+mkdir -p "$FIXTURES/large"
+dd if=/dev/zero bs=1024 count=3072 2>/dev/null | tr '\0' 'A' > "$FIXTURES/large/big-perl.tmp"
+echo "THUNDERSTORM_TEST_MATCH_STRING" >> "$FIXTURES/large/big-perl.tmp"
+
+# Run Perl with large file
+echo "Running Perl collector..."
+perl "$COLLECTOR_DIR/thunderstorm-collector.pl" \
+    -s localhost -p "$STUB_PORT" --dir "$FIXTURES/large" --max-age 30 --max-size-kb 4096 2>&1 | tail -3
+
+# Give stub time to write
+sleep 1
+
+# Check log
+echo "Checking log for big-perl.tmp..."
+python3 -c "
+import json
+for line in open('$STUB_LOG'):
+    d = json.loads(line.strip())
+    cf = d.get('subject', {}).get('client_filename', '')
+    if 'big-perl.tmp' in cf:
+        print(f'FOUND: {cf}')
+        print(f'Score: {d.get(\"score\", 0)}')
+        exit(0)
+print('NOT FOUND')
+exit(1)
+"
+
+# Cleanup
+rm -rf "$FIXTURES"
\ No newline at end of file
diff --git a/scripts/tests/verify_uploads.py b/scripts/tests/verify_uploads.py
new file mode 100644
index 0000000..b4f1705
--- /dev/null
+++ b/scripts/tests/verify_uploads.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import argparse
+import hashlib
+import pathlib
+import sys
+import time
+
+
+def sha256_of_file(path: pathlib.Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as f:
+        while True:
+            chunk = f.read(1024 * 1024)
+            if not chunk:
+                break
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def collect_files(root: pathlib.Path):
+    return sorted([p for p in root.rglob("*") if p.is_file()])
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Verify uploaded sample integrity in stub uploads dir.")
+    parser.add_argument("--uploads-dir", required=True, help="Directory used as thunderstorm-stub-server --uploads-dir")
+    parser.add_argument("--expected-sha256", required=True, help="Expected sha256 hash of each uploaded sample")
+    parser.add_argument("--min-count", type=int, required=True, help="Minimum number of uploaded files expected")
+    parser.add_argument("--timeout-seconds", type=int, default=60, help="Max time to wait for async uploads")
+    args = parser.parse_args()
+
+    uploads_dir = pathlib.Path(args.uploads_dir)
+    expected_sha256 = args.expected_sha256.lower()
+    deadline = time.time() + args.timeout_seconds
+
+    while time.time() < deadline:
+        files = collect_files(uploads_dir)
+        if len(files) >= args.min_count:
+            bad = []
+            for file_path in files:
+                actual_sha256 = sha256_of_file(file_path).lower()
+                if actual_sha256 != expected_sha256:
+                    bad.append((file_path, actual_sha256))
+
+            if bad:
+                print("Found uploaded files with unexpected hash:", file=sys.stderr)
+                for path, actual in bad:
+                    print(f"  {path}: {actual} (expected {expected_sha256})", file=sys.stderr)
+                return 1
+
+            print(f"Integrity verified for {len(files)} uploaded files.")
+            return 0
+
+        time.sleep(1)
+
+    files = collect_files(uploads_dir)
+    print(
+        f"Timed out waiting for uploads. Expected at least {args.min_count}, found {len(files)}.",
+        file=sys.stderr,
+    )
+    for file_path in files:
+        print(f"  Found: {file_path}", file=sys.stderr)
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/thunderstorm-collector-ash.sh b/scripts/thunderstorm-collector-ash.sh
new file mode 100755
index 0000000..f1ebc32
--- /dev/null
+++ b/scripts/thunderstorm-collector-ash.sh
@@ -0,0 +1,1144 @@
+#!/bin/sh
+#
+# THOR Thunderstorm Collector — POSIX sh / ash Edition
+# Florian Roth / Nextron Systems
+#
+# Goals:
+# - POSIX sh compatible (ash, dash, busybox sh, ksh88)
+# - No bash required — suitable for embedded Linux, routers, stripped VMs
+# - Functionally equivalent to thunderstorm-collector.sh
+#
+# Limitations vs the bash version:
+# - Filenames containing literal newlines will not be processed correctly
+#   (find -print0 / read -d '' require bash; this is an extreme edge case
+#   in real deployments and is documented here as a known trade-off)
+# - No associative arrays, no C-style for loops — all replaced with
+#   POSIX-compatible equivalents
+
+VERSION="0.5.0"
+
+# Defaults --------------------------------------------------------------------
+
+LOGFILE="./thunderstorm.log"
+LOG_TO_FILE=1
+LOG_TO_SYSLOG=0
+LOG_TO_CMDLINE=1
+SYSLOG_FACILITY="user"
+
+THUNDERSTORM_SERVER="ygdrasil.nextron"
+THUNDERSTORM_PORT=8080
+USE_SSL=0
+INSECURE=0
+CA_CERT=""
+ASYNC_MODE=1
+
+MAX_AGE=14
+MAX_FILE_SIZE_KB=2000
+DEBUG=0
+DRY_RUN=0
+RETRIES=3
+
+UPLOAD_TOOL=""
+TMP_FILES=""
+
+# Space-separated list of directories to scan (no bash arrays in ash)
+SCAN_DIRS="/root /tmp /home /var /usr"
+SCAN_DIRS_SET=0   # 1 once the user has overridden via --dir
+
+FILES_SCANNED=0
+FILES_SUBMITTED=0
+FILES_SKIPPED=0
+FILES_FAILED=0
+PROGRESS=1
+PROGRESS_SET=0
+
+SCRIPT_NAME="${0##*/}"
+START_TS="$(date +%s 2>/dev/null || echo 0)"
+SOURCE_NAME=""
+PROGRESS_ACTIVE=0
+
+# Filesystem exclusions (POSIX-compatible) ------------------------------------
+# Space-separated list of paths to prune during find.
+EXCLUDE_PATHS="/proc /sys /dev /run /snap /.snapshots"
+
+# Network and special filesystem types
+NETWORK_FS_TYPES="nfs nfs4 cifs smbfs smb3 sshfs fuse.sshfs afp webdav davfs2 fuse.rclone fuse.s3fs"
+SPECIAL_FS_TYPES="proc procfs sysfs devtmpfs devpts cgroup cgroup2 pstore bpf tracefs debugfs securityfs hugetlbfs mqueue autofs fusectl rpc_pipefs nsfs configfs binfmt_misc selinuxfs efivarfs ramfs"
+
+# Cloud storage folder names (lowercase for comparison)
+CLOUD_DIR_NAMES="onedrive dropbox .dropbox googledrive nextcloud owncloud mega megasync tresorit syncthing iclouddrive"
+
+# Cloud directory names that contain spaces — checked separately since the
+# space-separated CLOUD_DIR_NAMES list cannot hold them.
+CLOUD_DIR_NAMES_SPACED="google drive|icloud drive|onedrive -"
+
+# get_excluded_mounts: parse /proc/mounts, return mount points for network/special FS
+get_excluded_mounts() {
+    [ -r /proc/mounts ] || return 0
+    while IFS=' ' read -r _gem_dev _gem_mp _gem_fs _gem_rest; do
+        case " $NETWORK_FS_TYPES $SPECIAL_FS_TYPES " in
+            *" $_gem_fs "*) printf '%s\n' "$_gem_mp" ;;
+        esac
+    done < /proc/mounts
+}
+
+# is_cloud_path: check if a path contains a known cloud storage folder name
+is_cloud_path() {
+    _icp_lower="$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')"
+    for _icp_name in $CLOUD_DIR_NAMES; do
+        case "$_icp_lower" in
+            *"/$_icp_name"/*|*"/$_icp_name") return 0 ;;
+        esac
+    done
+    # Check cloud directory names that contain spaces (pipe-separated)
+    _icp_old_ifs="$IFS"
+    IFS='|'
+    for _icp_name in $CLOUD_DIR_NAMES_SPACED; do
+        case "$_icp_lower" in
+            *"/$_icp_name"*) IFS="$_icp_old_ifs"; return 0 ;;
+        esac
+    done
+    IFS="$_icp_old_ifs"
+    case "$_icp_lower" in
+        */library/cloudstorage/*|*/library/cloudstorage) return 0 ;;
+    esac
+    return 1
+}
+
+# Helpers ---------------------------------------------------------------------
+
+timestamp() {
+    date "+%Y-%m-%d_%H:%M:%S" 2>/dev/null || date
+}
+
+cleanup_tmp_files() {
+    for _f in $TMP_FILES; do
+        [ -n "$_f" ] && [ -f "$_f" ] && rm -f "$_f"
+    done
+}
+
+INTERRUPTED=0
+
+on_exit() {
+    cleanup_tmp_files
+}
+
+on_signal() {
+    INTERRUPTED=1
+    # Close file descriptors that may be open from the main loop
+    exec 3<&- 2>/dev/null
+    exec 4<&- 2>/dev/null
+    PROGRESS_ACTIVE=0
+    log_msg warn "Signal received — sending interrupted collection marker"
+    if [ "$DRY_RUN" -eq 0 ] && [ -n "$_GLOBAL_BASE_URL" ]; then
+        _sig_elapsed=0
+        if [ "$START_TS" -gt 0 ] 2>/dev/null; then
+            _sig_elapsed=$(( $(date +%s 2>/dev/null || echo "$START_TS") - START_TS ))
+            [ "$_sig_elapsed" -lt 0 ] && _sig_elapsed=0
+        fi
+        _sig_stats="\"stats\":{\"scanned\":${FILES_SCANNED},\"submitted\":${FILES_SUBMITTED},\"skipped\":${FILES_SKIPPED},\"failed\":${FILES_FAILED},\"elapsed_seconds\":${_sig_elapsed}}"
+        collection_marker "$_GLOBAL_BASE_URL" "interrupted" "$_GLOBAL_SCAN_ID" "$_sig_stats" >/dev/null
+    fi
+    cleanup_tmp_files
+    exit 1
+}
+
+trap on_exit EXIT
+trap on_signal INT TERM
+
+log_msg() {
+    _lm_level="$1"
+    shift
+    _lm_message="$*"
+
+    [ "$_lm_level" = "debug" ] && [ "$DEBUG" -ne 1 ] && return 0
+
+    _lm_ts="$(timestamp)"
+    # Strip CR/LF from message — no ${var//pat/rep} in ash, use tr
+    _lm_clean="$(printf '%s' "$_lm_message" | tr '\r\n' '  ')"
+
+    if [ "$LOG_TO_FILE" -eq 1 ]; then
+        if ! printf "%s %s %s\n" "$_lm_ts" "$_lm_level" "$_lm_clean" >> "$LOGFILE" 2>/dev/null; then
+            LOG_TO_FILE=0
+            printf "%s warn Could not write to log file '%s'; disabling file logging\n" \
+                "$_lm_ts" "$LOGFILE" >&2
+        fi
+    fi
+
+    if [ "$LOG_TO_SYSLOG" -eq 1 ] && command -v logger >/dev/null 2>&1; then
+        case "$_lm_level" in
+            error) _lm_prio="err" ;;
+            warn)  _lm_prio="warning" ;;
+            debug) _lm_prio="debug" ;;
+            *)     _lm_prio="info" ;;
+        esac
+        logger -p "${SYSLOG_FACILITY}.${_lm_prio}" "${SCRIPT_NAME}: ${_lm_clean}" \
+            >/dev/null 2>&1 || true
+    fi
+
+    if [ "$LOG_TO_CMDLINE" -eq 1 ]; then
+        case "$_lm_level" in
+            error|warn)
+                if [ "$PROGRESS_ACTIVE" -eq 1 ]; then
+                    printf '\r%80s\r' '' >&2
+                fi
+                printf "[%s] %s\n" "$_lm_level" "$_lm_clean" >&2
+                ;;
+            *)
+                if [ "$PROGRESS_ACTIVE" -eq 1 ]; then
+                    printf '\r%80s\r' '' >&2
+                fi
+                printf "[%s] %s\n" "$_lm_level" "$_lm_clean" >&2
+                ;;
+        esac
+    fi
+}
+
+die() {
+    log_msg error "$*"
+    exit 2
+}
+
+print_banner() {
+    cat <<EOF
+==============================================================
+    ________                __            __
+   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _
+    / / / _ \/ // / _ \/ _  / -_) __(_-</ __/ _ \/ __/  ' \\
+   /_/ /_//_/\_,_/_//_/\_,_/\__/_/ /___/\__/\___/_/ /_/_/_/
+   v${VERSION} (POSIX sh / ash edition)
+
+   THOR Thunderstorm Collector for Linux/Unix
+==============================================================
+EOF
+}
+
+print_help() {
+    cat <<'EOF'
+Usage:
+  sh thunderstorm-collector-ash.sh [options]
+
+Options:
+  -s, --server <host>        Thunderstorm server hostname or IP
+  -p, --port <port>          Thunderstorm port (default: 8080)
+  -d, --dir <path>           Directory to scan (repeatable)
+  --max-age <days>           Max file age in days (default: 14)
+  --max-size-kb <kb>         Max file size in KB (default: 2000)
+  --source <name>            Source identifier (default: hostname)
+  --ssl                      Use HTTPS
+  -k, --insecure             Skip TLS certificate verification
+  --ca-cert <path>           Path to custom CA certificate bundle for TLS
+  --sync                     Use /api/check (default: /api/checkAsync)
+  --retries <num>            Retry attempts per file (default: 3)
+  --dry-run                  Do not upload, only show what would be submitted
+  --debug                    Enable debug log messages
+  --log-file <path>          Log file path (default: ./thunderstorm.log)
+  --no-log-file              Disable file logging
+  --syslog                   Enable syslog logging
+  --progress                 Force progress reporting on
+  --no-progress              Force progress reporting off
+  --quiet                    Disable command-line logging
+  -h, --help                 Show this help text
+
+Notes:
+  This script requires only POSIX sh (ash, dash, busybox sh).
+  Filenames containing literal newline characters are not supported.
+  For systems with bash available, prefer thunderstorm-collector.sh.
+
+Examples:
+  sh thunderstorm-collector-ash.sh --server thunderstorm.local
+  sh thunderstorm-collector-ash.sh --server 10.0.0.5 --dir /tmp --dir /home
+EOF
+}
+
+is_integer() {
+    case "$1" in
+        ''|*[!0-9]*) return 1 ;;
+        *) return 0 ;;
+    esac
+}
+
+detect_source_name() {
+    [ -n "$SOURCE_NAME" ] && return 0
+    if command -v hostname >/dev/null 2>&1; then
+        SOURCE_NAME="$(hostname -f 2>/dev/null)"
+        [ -z "$SOURCE_NAME" ] && SOURCE_NAME="$(hostname 2>/dev/null)"
+    fi
+    [ -z "$SOURCE_NAME" ] && SOURCE_NAME="$(uname -n 2>/dev/null)"
+    [ -z "$SOURCE_NAME" ] && SOURCE_NAME="unknown-host"
+}
+
+urlencode() {
+    # POSIX-safe urlencode: no bash C-style for loop or ${var:i:1}
+    # Process od hex output word by word via set --
+    _ue_hex="$(printf '%s' "$1" | od -An -tx1 | tr -d '\n')"
+    # shellcheck disable=SC2086
+    set -- $_ue_hex
+    _ue_result=""
+    for _ue_byte; do
+        [ -z "$_ue_byte" ] && continue
+        # Validate hex token: must be exactly 2 hex digits
+        case "$_ue_byte" in
+            [0-9a-fA-F][0-9a-fA-F]) ;;
+            *) continue ;;
+        esac
+        _ue_dec=$(printf '%d' "0x${_ue_byte}" 2>/dev/null) || continue
+        # Pass through RFC 3986 unreserved characters: A-Z a-z 0-9 - _ . ~
+        if   { [ "$_ue_dec" -ge 65 ] && [ "$_ue_dec" -le  90 ]; } \
+          || { [ "$_ue_dec" -ge 97 ] && [ "$_ue_dec" -le 122 ]; } \
+          || { [ "$_ue_dec" -ge 48 ] && [ "$_ue_dec" -le  57 ]; } \
+          ||   [ "$_ue_dec" -eq 45 ] \
+          ||   [ "$_ue_dec" -eq 95 ] \
+          ||   [ "$_ue_dec" -eq 46 ] \
+          ||   [ "$_ue_dec" -eq 126 ]; then
+            _ue_result="${_ue_result}$(printf "\\$(printf '%03o' "$_ue_dec")")"
+        else
+            _ue_result="${_ue_result}%$(printf '%02X' "$_ue_dec")"
+        fi
+    done
+    printf '%s' "$_ue_result"
+}
+
+build_query_source() {
+    [ -n "$1" ] && printf "?source=%s" "$(urlencode "$1")"
+}
+
+sanitize_filename_for_multipart() {
+    # No ${var//pat/rep} in ash — use sed + tr
+    printf '%s' "$1" | sed 's/["\\;]/_/g' | tr '\r\n' '__'
+}
+
+file_size_kb() {
+    _sz_bytes="$(wc -c < "$1" 2>/dev/null | tr -d ' \t')"
+    case "$_sz_bytes" in
+        ''|*[!0-9]*) echo -1; return 1 ;;
+    esac
+    echo $(( (_sz_bytes + 1023) / 1024 ))
+}
+
+mktemp_portable() {
+    _mp_t="$(mktemp "${TMPDIR:-/tmp}/thunderstorm.XXXXXX" 2>/dev/null)"
+    if [ -n "$_mp_t" ]; then
+        echo "$_mp_t"
+        return 0
+    fi
+    # mktemp unavailable — create a private temp directory with restrictive
+    # permissions, then place files inside it to avoid symlink races.
+    _mp_dir="${TMPDIR:-/tmp}/thunderstorm.$$"
+    if [ ! -d "$_mp_dir" ]; then
+        ( umask 077 && mkdir "$_mp_dir" ) 2>/dev/null || return 1
+    fi
+    _mp_seq=0
+    while :; do
+        _mp_t="${_mp_dir}/${_mp_seq}.$(date +%s 2>/dev/null || echo 0)"
+        if ( set -C; : > "$_mp_t" ) 2>/dev/null; then
+            echo "$_mp_t"
+            return 0
+        fi
+        _mp_seq=$((_mp_seq + 1))
+        [ "$_mp_seq" -gt 100 ] && return 1
+    done
+}
+
+_wget_is_busybox() {
+    # BusyBox wget truncates --post-file at the first NUL byte, making it
+    # unable to upload binary files.  Detect it so we can fall back to nc.
+    # Note: BusyBox wget does not support --version; use --help instead.
+    # Use head -1 to check only the first line and avoid excessive output.
+    wget --help 2>&1 | head -5 | grep -qi busybox
+}
+
+detect_upload_tool() {
+    if command -v curl >/dev/null 2>&1; then
+        UPLOAD_TOOL="curl"
+        return 0
+    fi
+    # Prefer nc over BusyBox wget for binary-safe uploads
+    if command -v wget >/dev/null 2>&1 && ! _wget_is_busybox; then
+        UPLOAD_TOOL="wget"
+        return 0
+    fi
+    if command -v nc >/dev/null 2>&1; then
+        UPLOAD_TOOL="nc"
+        return 0
+    fi
+    # Fall back to BusyBox wget (works for text files, truncates binary at NUL)
+    if command -v wget >/dev/null 2>&1; then
+        UPLOAD_TOOL="wget"
+        log_msg warn "WARNING: BusyBox wget detected — --post-file truncates at the first NUL byte."
+        log_msg warn "Binary files (EXE, DLL, ZIP, etc.) will be silently corrupted during upload."
+        log_msg warn "Install curl or full GNU wget for reliable binary uploads."
+        return 0
+    fi
+    return 1
+}
+
+upload_with_curl() {
+    _uc_endpoint="$1"
+    _uc_filepath="$2"
+    _uc_filename="$3"
+    _uc_safe_name="$(sanitize_filename_for_multipart "$_uc_filename")"
+    _uc_resp="$(mktemp_portable)" || return 91
+    _uc_hdr="$(mktemp_portable)" || return 91
+    TMP_FILES="${TMP_FILES} ${_uc_resp} ${_uc_hdr}"
+
+    # Build TLS arguments safely to avoid word-splitting on paths with spaces
+    set -- -sS -X POST -o "$_uc_resp" -D "$_uc_hdr" -w '%{http_code}'
+    [ "$INSECURE" -eq 1 ] && set -- "$@" -k
+    [ -n "$CA_CERT" ] && set -- "$@" --cacert "$CA_CERT"
+    set -- "$@" "$_uc_endpoint" \
+        -F "file=@${_uc_filepath};filename=${_uc_safe_name}"
+
+    # Use -w to capture HTTP status code; do NOT use --fail so we can inspect 503
+    _uc_http_code="$(curl "$@" 2>"${_uc_resp}.err")"
+    _uc_code=$?
+
+    if [ "$_uc_code" -ne 0 ]; then
+        _uc_err="$(cat "${_uc_resp}.err" 2>/dev/null | tr '\r\n' '  ')"
+        TMP_FILES="${TMP_FILES} ${_uc_resp}.err"
+        log_msg debug "curl error (code $_uc_code) for '$_uc_filepath': $_uc_err"
+        return "$_uc_code"
+    fi
+    TMP_FILES="${TMP_FILES} ${_uc_resp}.err"
+
+    # Handle 503 back-pressure: return special code 103 and set RETRY_AFTER
+    if [ "$_uc_http_code" = "503" ]; then
+        RETRY_AFTER=""
+        _uc_ra="$(grep -i '^Retry-After:' "$_uc_hdr" 2>/dev/null | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')"
+        if is_integer "$_uc_ra" 2>/dev/null && [ "$_uc_ra" -gt 0 ] 2>/dev/null; then
+            # Cap at 120 seconds
+            [ "$_uc_ra" -gt 120 ] && _uc_ra=120
+            RETRY_AFTER="$_uc_ra"
+        fi
+        log_msg warn "Server returned 503 for '$_uc_filepath'"
+        return 103
+    fi
+
+    # Any other non-2xx status
+    case "$_uc_http_code" in
+        2*) ;;
+        *)
+            _uc_body="$(cat "$_uc_resp" 2>/dev/null | tr '\r\n' '  ')"
+            log_msg error "Server returned HTTP $_uc_http_code for '$_uc_filepath': $_uc_body"
+            return 92
+            ;;
+    esac
+
+    if grep -qi "reason" "$_uc_resp" 2>/dev/null; then
+        _uc_body="$(cat "$_uc_resp" 2>/dev/null | tr '\r\n' '  ')"
+        log_msg error "Server reported rejection for '$_uc_filepath': $_uc_body"
+        return 92
+    fi
+    return 0
+}
+
+# generate_safe_boundary: produce a multipart boundary that does not appear in
+# the given file.  Regenerates up to 10 times if a collision is detected.
+generate_safe_boundary() {
+    _gsb_filepath="$1"
+    _gsb_attempt=0
+    while [ "$_gsb_attempt" -lt 10 ]; do
+        _gsb_rand="$(od -An -N16 -tx1 /dev/urandom 2>/dev/null | tr -d ' \n')"
+        _gsb_boundary="----ThunderstormBoundary${$}${_gsb_rand:-$(date +%s 2>/dev/null || echo 0)${_gsb_attempt}}"
+        if ! LC_ALL=C grep -qF "$_gsb_boundary" "$_gsb_filepath" 2>/dev/null; then
+            printf '%s' "$_gsb_boundary"
+            return 0
+        fi
+        _gsb_attempt=$((_gsb_attempt + 1))
+    done
+    # Exhausted attempts — return last candidate (collision is astronomically unlikely)
+    printf '%s' "$_gsb_boundary"
+}
+
+upload_with_wget() {
+    _uw_endpoint="$1"
+    _uw_filepath="$2"
+    _uw_filename="$3"
+    _uw_safe_name="$(sanitize_filename_for_multipart "$_uw_filename")"
+    _uw_boundary="$(generate_safe_boundary "$_uw_filepath")"
+    _uw_body="$(mktemp_portable)" || return 93
+    _uw_resp="$(mktemp_portable)" || return 94
+    _uw_hdr="$(mktemp_portable)" || return 94
+    TMP_FILES="${TMP_FILES} ${_uw_body} ${_uw_resp} ${_uw_hdr}"
+
+    {
+        printf -- "--%s\r\n" "$_uw_boundary"
+        printf 'Content-Disposition: form-data; name="file"; filename="%s"\r\n' \
+            "$_uw_safe_name"
+        printf 'Content-Type: application/octet-stream\r\n\r\n'
+        cat "$_uw_filepath"
+        printf '\r\n--%s--\r\n' "$_uw_boundary"
+    } > "$_uw_body" 2>/dev/null || return 95
+
+    # Use --server-response to capture HTTP status; stderr has the headers
+    # Build TLS arguments safely to avoid word-splitting on paths with spaces
+    set -- -O "$_uw_resp" -S
+    [ "$INSECURE" -eq 1 ] && set -- "$@" --no-check-certificate
+    [ -n "$CA_CERT" ] && set -- "$@" "--ca-certificate=$CA_CERT"
+    set -- "$@" --header="Content-Type: multipart/form-data; boundary=${_uw_boundary}" \
+        --post-file="$_uw_body" \
+        "$_uw_endpoint"
+
+    wget "$@" 2>"$_uw_hdr"
+    _uw_code=$?
+
+    # Parse HTTP status code from wget's server response output
+    # wget -S prints "  HTTP/1.1 200 OK" lines to stderr
+    # Use sed instead of grep -oE for POSIX/BusyBox compatibility
+    _uw_http_code="$(sed -n 's/.*HTTP\/[0-9.]*[[:space:]]*\([0-9][0-9][0-9]\).*/\1/p' "$_uw_hdr" 2>/dev/null | tail -1)"
+
+    # If wget failed and we couldn't parse a status, return the wget error
+    if [ "$_uw_code" -ne 0 ] && [ -z "$_uw_http_code" ]; then
+        return "$_uw_code"
+    fi
+
+    # Handle 503 back-pressure
+    if [ "$_uw_http_code" = "503" ]; then
+        RETRY_AFTER=""
+        _uw_ra="$(grep -i 'Retry-After:' "$_uw_hdr" 2>/dev/null | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')"
+        if is_integer "$_uw_ra" 2>/dev/null && [ "$_uw_ra" -gt 0 ] 2>/dev/null; then
+            [ "$_uw_ra" -gt 120 ] && _uw_ra=120
+            RETRY_AFTER="$_uw_ra"
+        fi
+        log_msg warn "Server returned 503 for '$_uw_filepath'"
+        return 103
+    fi
+
+    # Accept 2xx as success
+    if [ -n "$_uw_http_code" ]; then
+        case "$_uw_http_code" in
+            2[0-9][0-9]) ;;
+            *)
+                _uw_body_content="$(cat "$_uw_resp" 2>/dev/null | tr '\r\n' '  ')"
+                log_msg error "Server returned HTTP $_uw_http_code for '$_uw_filepath': $_uw_body_content"
+                return 92
+                ;;
+        esac
+    fi
+
+    # wget returned success but check for rejection in body
+    if grep -qi "reason" "$_uw_resp" 2>/dev/null; then
+        _uw_body_content="$(cat "$_uw_resp" 2>/dev/null | tr '\r\n' '  ')"
+        log_msg error "Server reported rejection for '$_uw_filepath': $_uw_body_content"
+        return 96
+    fi
+    return 0
+}
+
+upload_with_nc() {
+    # Raw HTTP POST via netcat — binary-safe, no NUL truncation.
+    # Used as a fallback when only BusyBox wget + nc are available.
+    # WARNING: nc does not support TLS — only works with plain HTTP.
+    _nc_endpoint="$1"    # full URL: http://host:port/path?query
+    case "$_nc_endpoint" in
+        https://*) log_msg error "nc (netcat) does not support HTTPS; use curl or wget"; return 99 ;;
+    esac
+    _nc_filepath="$2"
+    _nc_filename="$3"
+    _nc_safe_name="$(sanitize_filename_for_multipart "$_nc_filename")"
+    _nc_boundary="$(generate_safe_boundary "$_nc_filepath")"
+    _nc_body="$(mktemp_portable)" || return 97
+    _nc_resp_file="$(mktemp_portable)" || return 97
+    TMP_FILES="${TMP_FILES} ${_nc_body} ${_nc_resp_file}"
+
+    # Build multipart body
+    {
+        printf -- "--%s\r\n" "$_nc_boundary"
+        printf 'Content-Disposition: form-data; name="file"; filename="%s"\r\n' \
+            "$_nc_safe_name"
+        printf 'Content-Type: application/octet-stream\r\n\r\n'
+        cat "$_nc_filepath"
+        printf '\r\n--%s--\r\n' "$_nc_boundary"
+    } > "$_nc_body" 2>/dev/null || return 98
+
+    _nc_content_length="$(wc -c < "$_nc_body" | tr -d ' \t')"
+
+    # Parse host and port from the endpoint URL
+    # Strip scheme
+    _nc_hostpath="${_nc_endpoint#*://}"
+    # Extract host:port
+    _nc_hostport="${_nc_hostpath%%/*}"
+    _nc_host="${_nc_hostport%%:*}"
+    _nc_port="${_nc_hostport##*:}"
+    [ "$_nc_port" = "$_nc_host" ] && _nc_port=80
+    # Extract path+query
+    _nc_path="/${_nc_hostpath#*/}"
+
+    # Send raw HTTP via nc (cat merges headers + binary body into one stream)
+    {
+        printf "POST %s HTTP/1.0\r\n" "$_nc_path"
+        printf "Host: %s\r\n" "$_nc_hostport"
+        printf "Content-Type: multipart/form-data; boundary=%s\r\n" "$_nc_boundary"
+        printf "Content-Length: %s\r\n" "$_nc_content_length"
+        printf "Connection: close\r\n"
+        printf "\r\n"
+        cat "$_nc_body"
+    } | nc "$_nc_host" "$_nc_port" -w 30 > "$_nc_resp_file" 2>/dev/null
+
+    # No response or connection failure
+    if [ ! -s "$_nc_resp_file" ]; then
+        log_msg error "No response from server for '$_nc_filepath'"
+        return 1
+    fi
+
+    # Parse HTTP status code from the first line (e.g. "HTTP/1.1 200 OK")
+    _nc_status_line="$(head -1 "$_nc_resp_file" | tr -d '\r')"
+    _nc_http_code="$(printf '%s' "$_nc_status_line" | sed -n 's/^HTTP\/[^ ]* \([0-9][0-9]*\).*/\1/p')"
+
+    if [ -z "$_nc_http_code" ]; then
+        log_msg error "Could not parse HTTP status for '$_nc_filepath': $_nc_status_line"
+        return 99
+    fi
+
+    # Handle 503 back-pressure
+    if [ "$_nc_http_code" = "503" ]; then
+        RETRY_AFTER=""
+        _nc_ra="$(grep -i '^Retry-After:' "$_nc_resp_file" 2>/dev/null | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')"
+        if is_integer "$_nc_ra" 2>/dev/null && [ "$_nc_ra" -gt 0 ] 2>/dev/null; then
+            [ "$_nc_ra" -gt 120 ] && _nc_ra=120
+            RETRY_AFTER="$_nc_ra"
+        fi
+        log_msg warn "Server returned 503 for '$_nc_filepath'"
+        return 103
+    fi
+
+    # Accept 2xx as success
+    case "$_nc_http_code" in
+        2[0-9][0-9])
+            # Check for rejection in response body (consistent with curl/wget paths)
+            if grep -qi "reason" "$_nc_resp_file" 2>/dev/null; then
+                _nc_body_content="$(sed '1,/^\r*$/d' "$_nc_resp_file" 2>/dev/null | tr '\r\n' '  ')"
+                log_msg error "Server reported rejection for '$_nc_filepath': $_nc_body_content"
+                return 99
+            fi
+            return 0
+            ;;
+    esac
+
+    # All other statuses are errors
+    log_msg error "Server returned HTTP $_nc_http_code for '$_nc_filepath': $_nc_status_line"
+    return 99
+}
+
+# json_escape: escape a string for safe inclusion in JSON values
+# Handles backslash, double-quote, and all control characters (0x00-0x1F)
+# Uses od + byte-by-byte rebuild for full POSIX portability
+json_escape() {
+    _je_hex="$(printf '%s' "$1" | od -An -tx1 | tr -d '\n')"
+    _je_result=""
+    # shellcheck disable=SC2086
+    set -- $_je_hex
+    for _je_byte; do
+        [ -z "$_je_byte" ] && continue
+        _je_dec=$(printf '%d' "0x${_je_byte}" 2>/dev/null) || continue
+        if [ "$_je_dec" -eq 92 ]; then
+            # backslash
+            _je_result="${_je_result}\\\\"
+        elif [ "$_je_dec" -eq 34 ]; then
+            # double quote
+            _je_result="${_je_result}\\\""
+        elif [ "$_je_dec" -eq 8 ]; then
+            _je_result="${_je_result}\\b"
+        elif [ "$_je_dec" -eq 9 ]; then
+            _je_result="${_je_result}\\t"
+        elif [ "$_je_dec" -eq 10 ]; then
+            _je_result="${_je_result}\\n"
+        elif [ "$_je_dec" -eq 12 ]; then
+            _je_result="${_je_result}\\f"
+        elif [ "$_je_dec" -eq 13 ]; then
+            _je_result="${_je_result}\\r"
+        elif [ "$_je_dec" -lt 32 ]; then
+            # Other control characters: emit \u00XX
+            _je_result="${_je_result}$(printf '\\u00%02x' "$_je_dec")"
+        else
+            _je_result="${_je_result}$(printf "\\$(printf '%03o' "$_je_dec")")"
+        fi
+    done
+    printf '%s' "$_je_result"
+}
+
+# collection_marker -- POST a begin/end marker to /api/collection
+# Args: $1=base_url  $2=type(begin|end)  $3=scan_id(optional)  $4=stats_json(optional)
+# Returns: scan_id from response (empty if unsupported/failed)
+# Exit status: 0 = success, 1 = connection/request failure
+collection_marker() {
+    _cm_base_url="$1"
+    _cm_type="$2"
+    _cm_scan_id="${3:-}"
+    _cm_stats="${4:-}"
+    _cm_url="${_cm_base_url%/}/api/collection"
+    _cm_resp="$(mktemp_portable)" || return 1
+
+    _cm_safe_source="$(json_escape "$SOURCE_NAME")"
+    _cm_body="{\"type\":\"${_cm_type}\""
+    _cm_safe_hostname="$(json_escape "$(uname -n 2>/dev/null || echo unknown)")"
+    _cm_body="${_cm_body},\"source\":\"${_cm_safe_source}\""
+    _cm_body="${_cm_body},\"hostname\":\"${_cm_safe_hostname}\""
+    _cm_body="${_cm_body},\"collector\":\"ash/${VERSION}\""
+    _cm_body="${_cm_body},\"timestamp\":\"$(date -u '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || date -u)\""
+    [ -n "$_cm_scan_id" ] && _cm_body="${_cm_body},\"scan_id\":\"${_cm_scan_id}\""
+    [ -n "$_cm_stats"   ] && _cm_body="${_cm_body},${_cm_stats}"
+    _cm_body="${_cm_body}}"
+
+    _cm_ok=0
+    _cm_hdr="$(mktemp_portable)" || { rm -f "$_cm_resp"; return 1; }
+    : > "$_cm_resp" 2>/dev/null || true
+    if command -v curl >/dev/null 2>&1; then
+        set -- -sS -o "$_cm_resp" -D "$_cm_hdr" -w '%{http_code}' -H "Content-Type: application/json" -d "$_cm_body" --max-time 10
+        [ "$INSECURE" -eq 1 ] && set -- "$@" -k
+        [ -n "$CA_CERT" ] && set -- "$@" --cacert "$CA_CERT"
+        set -- "$@" "$_cm_url"
+        _cm_http_code="$(curl "$@" 2>/dev/null)"
+        _cm_curl_rc=$?
+        if [ "$_cm_curl_rc" -eq 0 ]; then
+            case "$_cm_http_code" in
+                2[0-9][0-9]) _cm_ok=1 ;;
+                404|501) log_msg warn "Collection marker '$_cm_type' not supported (HTTP $_cm_http_code) — server does not implement /api/collection"; _cm_ok=1 ;;
+                *) log_msg warn "Collection marker '$_cm_type' got HTTP $_cm_http_code" ;;
+            esac
+        fi
+    elif command -v wget >/dev/null 2>&1; then
+        set -- -O "$_cm_resp" -S --header "Content-Type: application/json" --post-data "$_cm_body" --timeout=10
+        [ "$INSECURE" -eq 1 ] && set -- "$@" --no-check-certificate
+        [ -n "$CA_CERT" ] && set -- "$@" "--ca-certificate=$CA_CERT"
+        set -- "$@" "$_cm_url"
+        wget "$@" 2>"$_cm_hdr"
+        _cm_wget_rc=$?
+        _cm_http_code="$(sed -n 's/.*HTTP\/[0-9.]*[[:space:]]*\([0-9][0-9][0-9]\).*/\1/p' "$_cm_hdr" 2>/dev/null | tail -1)"
+        if [ "$_cm_wget_rc" -eq 0 ]; then
+            case "$_cm_http_code" in
+                2[0-9][0-9]|"") _cm_ok=1 ;;
+                404|501) log_msg warn "Collection marker '$_cm_type' not supported (HTTP $_cm_http_code) — server does not implement /api/collection"; _cm_ok=1 ;;
+                *) log_msg warn "Collection marker '$_cm_type' got HTTP $_cm_http_code" ;;
+            esac
+        else
+            if [ -n "$_cm_http_code" ]; then
+                log_msg warn "Collection marker '$_cm_type' got HTTP $_cm_http_code (wget exit $_cm_wget_rc)"
+            fi
+        fi
+    fi
+    rm -f "$_cm_hdr"
+
+    # Extract scan_id value using a strict regex that only matches plain
+    # (unescaped) JSON string values containing safe characters.
+    # This avoids partial JSON unescaping bugs — if the server returns an
+    # escaped scan_id we simply won't match it, which is safe (we continue
+    # without a scan_id).
+    _cm_id="$(sed -n 's/.*"scan_id"[[:space:]]*:[[:space:]]*"\([A-Za-z0-9._:-]*\)".*/\1/p' "$_cm_resp" 2>/dev/null | head -1)"
+    rm -f "$_cm_resp"
+    printf '%s' "$_cm_id"
+    [ "$_cm_ok" -eq 1 ]
+}
+
+submit_file() {
+    _sf_endpoint="$1"
+    _sf_filepath="$2"
+    _sf_filename="$_sf_filepath"
+    _sf_try=1
+    _sf_rc=1
+    _sf_wait=2
+    RETRY_AFTER=""
+
+    if [ "$DRY_RUN" -eq 1 ]; then
+        log_msg info "DRY-RUN: would submit '$_sf_filepath'"
+        return 0
+    fi
+
+    while [ "$_sf_try" -le "$RETRIES" ]; do
+        RETRY_AFTER=""
+
+        case "$UPLOAD_TOOL" in
+            curl)
+                upload_with_curl "$_sf_endpoint" "$_sf_filepath" "$_sf_filename"
+                _sf_rc=$? ;;
+            nc)
+                upload_with_nc "$_sf_endpoint" "$_sf_filepath" "$_sf_filename"
+                _sf_rc=$? ;;
+            *)
+                upload_with_wget "$_sf_endpoint" "$_sf_filepath" "$_sf_filename"
+                _sf_rc=$? ;;
+        esac
+
+        [ "$_sf_rc" -eq 0 ] && return 0
+
+        log_msg warn "Upload failed for '$_sf_filepath' (attempt ${_sf_try}/${RETRIES}, code ${_sf_rc})"
+        if [ "$_sf_try" -lt "$RETRIES" ]; then
+            # Use Retry-After from 503 if available, otherwise exponential backoff
+            if [ "$_sf_rc" -eq 103 ] && [ -n "$RETRY_AFTER" ]; then
+                log_msg info "Server requested Retry-After: ${RETRY_AFTER}s"
+                sleep "$RETRY_AFTER"
+            else
+                sleep "$_sf_wait"
+                _sf_wait=$((_sf_wait * 2))
+                [ "$_sf_wait" -gt 60 ] && _sf_wait=60
+            fi
+        fi
+        _sf_try=$((_sf_try + 1))
+    done
+
+    return "$_sf_rc"
+}
+
+parse_args() {
+    while [ $# -gt 0 ]; do
+        _pa_arg="$1"
+        case "$_pa_arg" in
+            -h|--help)
+                print_help
+                exit 0
+                ;;
+            -s|--server)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                THUNDERSTORM_SERVER="$2"
+                shift
+                ;;
+            -p|--port)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                THUNDERSTORM_PORT="$2"
+                shift
+                ;;
+            -d|--dir)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                if [ "$SCAN_DIRS_SET" -eq 0 ]; then
+                    SCAN_DIRS=""
+                    SCAN_DIRS_SET=1
+                fi
+                # Append to space-separated list (quote-safe for dirs without spaces)
+                # Dirs with spaces are handled via IFS manipulation during iteration
+                SCAN_DIRS="${SCAN_DIRS:+$SCAN_DIRS
+}$2"
+                shift
+                ;;
+            --max-age)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                MAX_AGE="$2"
+                shift
+                ;;
+            --max-size-kb)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                MAX_FILE_SIZE_KB="$2"
+                shift
+                ;;
+            --source)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                SOURCE_NAME="$2"
+                shift
+                ;;
+            --ssl)
+                USE_SSL=1
+                ;;
+            -k|--insecure)
+                INSECURE=1
+                ;;
+            --ca-cert)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                CA_CERT="$2"
+                shift
+                ;;
+            --sync)
+                ASYNC_MODE=0
+                ;;
+            --retries)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                RETRIES="$2"
+                shift
+                ;;
+            --dry-run)
+                DRY_RUN=1
+                ;;
+            --debug)
+                DEBUG=1
+                ;;
+            --log-file)
+                [ -n "$2" ] || die "Missing value for $_pa_arg"
+                LOGFILE="$2"
+                shift
+                ;;
+            --no-log-file)
+                LOG_TO_FILE=0
+                ;;
+            --syslog)
+                LOG_TO_SYSLOG=1
+                ;;
+            --quiet)
+                LOG_TO_CMDLINE=0
+                ;;
+            --progress)
+                PROGRESS=1
+                PROGRESS_SET=1
+                ;;
+            --no-progress)
+                PROGRESS=0
+                PROGRESS_SET=1
+                ;;
+            --)
+                shift
+                break
+                ;;
+            -*)
+                die "Unknown option: $_pa_arg (use --help)"
+                ;;
+            *)
+                # Positional args treated as additional directories
+                if [ "$SCAN_DIRS_SET" -eq 0 ]; then
+                    SCAN_DIRS=""
+                    SCAN_DIRS_SET=1
+                fi
+                SCAN_DIRS="${SCAN_DIRS:+$SCAN_DIRS
+}$_pa_arg"
+                ;;
+        esac
+        shift
+    done
+}
+
+validate_config() {
+    is_integer "$THUNDERSTORM_PORT"  || die "Port must be numeric: '$THUNDERSTORM_PORT'"
+    is_integer "$MAX_AGE"            || die "max-age must be numeric: '$MAX_AGE'"
+    is_integer "$MAX_FILE_SIZE_KB"   || die "max-size-kb must be numeric: '$MAX_FILE_SIZE_KB'"
+    is_integer "$RETRIES"            || die "retries must be numeric: '$RETRIES'"
+    [ "$THUNDERSTORM_PORT" -gt 0 ]   || die "Port must be greater than 0"
+    [ "$MAX_AGE" -ge 0 ]             || die "max-age must be >= 0"
+    [ "$MAX_FILE_SIZE_KB" -gt 0 ]    || die "max-size-kb must be > 0"
+    [ "$RETRIES" -gt 0 ]             || die "retries must be > 0"
+    [ -n "$THUNDERSTORM_SERVER" ]    || die "Server must not be empty"
+    [ -n "$SCAN_DIRS" ]              || die "At least one directory is required"
+}
+
+main() {
+    _scheme="http"
+    _endpoint_name="check"
+    _query_source=""
+    _api_endpoint=""
+    _base_url=""
+    _SCAN_ID=""
+    _elapsed=0
+    _find_mtime=""
+    _results_file=""
+    _GLOBAL_BASE_URL=""
+    _GLOBAL_SCAN_ID=""
+
+    parse_args "$@"
+    _find_mtime="-${MAX_AGE}"
+    detect_source_name
+    validate_config
+    print_banner
+
+    if [ "$(id -u 2>/dev/null || echo 1)" != "0" ]; then
+        log_msg warn "Running without root privileges; some files may be inaccessible"
+    fi
+
+    [ "$USE_SSL"    -eq 1 ] && _scheme="https"
+    if [ -n "$CA_CERT" ]; then
+        [ -f "$CA_CERT" ] || die "CA certificate file not found: '$CA_CERT'"
+    fi
+    [ "$ASYNC_MODE" -eq 1 ] && _endpoint_name="checkAsync"
+
+    _query_source="$(build_query_source "$SOURCE_NAME")"
+    _base_url="${_scheme}://${THUNDERSTORM_SERVER}:${THUNDERSTORM_PORT}"
+    _api_endpoint="${_base_url}/api/${_endpoint_name}${_query_source}"
+    log_msg debug "Base URL: $_base_url"
+    log_msg debug "API endpoint: $_api_endpoint"
+
+    if [ "$DRY_RUN" -eq 0 ]; then
+        detect_upload_tool || die "Neither 'curl', 'wget', nor 'nc' is installed; unable to upload samples"
+    else
+        if detect_upload_tool; then
+            log_msg info "Dry-run mode active (upload tool detected: $UPLOAD_TOOL)"
+        else
+            log_msg info "Dry-run mode active (no upload tool required)"
+        fi
+    fi
+
+    log_msg info "Started Thunderstorm Collector (ash) - Version $VERSION"
+    log_msg info "Server: $THUNDERSTORM_SERVER"
+    log_msg info "Port: $THUNDERSTORM_PORT"
+    log_msg info "API endpoint: $_api_endpoint"
+    log_msg info "Max age (days): $MAX_AGE"
+    log_msg info "Max size (KB): $MAX_FILE_SIZE_KB"
+    log_msg info "Source: $SOURCE_NAME"
+    log_msg info "Folders: $(printf '%s' "$SCAN_DIRS" | tr '\n' ' ')"
+    [ "$DRY_RUN" -eq 1 ] && log_msg info "Dry-run mode enabled"
+
+    # TTY auto-detection for progress reporting
+    if [ "$PROGRESS_SET" -eq 0 ]; then
+        if [ -t 2 ]; then
+            PROGRESS=1
+        else
+            PROGRESS=0
+        fi
+    fi
+
+    # Store in globals for signal handler access
+    _GLOBAL_BASE_URL="$_base_url"
+    _GLOBAL_SCAN_ID=""
+
+    # Send collection begin marker; capture scan_id if server returns one
+    # Retry once after 2s on initial connection failure
+    if [ "$DRY_RUN" -eq 0 ]; then
+        _begin_ok=0
+        _scan_id_file="$(mktemp_portable)" || die "Could not create temp file for scan_id"
+        TMP_FILES="${TMP_FILES} ${_scan_id_file}"
+        if collection_marker "$_base_url" "begin" "" "" > "$_scan_id_file"; then
+            _SCAN_ID="$(cat "$_scan_id_file")"
+            _begin_ok=1
+        fi
+        if [ "$_begin_ok" -eq 0 ]; then
+            log_msg warn "Begin marker failed; retrying in 2 seconds..."
+            sleep 2
+            if collection_marker "$_base_url" "begin" "" "" > "$_scan_id_file"; then
+                _SCAN_ID="$(cat "$_scan_id_file")"
+                _begin_ok=1
+            else
+                die "Cannot connect to Thunderstorm server at ${_base_url}/api/collection after retry"
+            fi
+        fi
+        rm -f "$_scan_id_file"
+        if [ -n "$_SCAN_ID" ]; then
+            log_msg info "Collection scan_id: $_SCAN_ID"
+            _GLOBAL_SCAN_ID="$_SCAN_ID"
+            # Check if endpoint already has query params
+            case "$_api_endpoint" in
+                *"?"*) _api_endpoint="${_api_endpoint}&scan_id=$(urlencode "$_SCAN_ID")" ;;
+                *)     _api_endpoint="${_api_endpoint}?scan_id=$(urlencode "$_SCAN_ID")" ;;
+            esac
+            log_msg debug "API endpoint (with scan_id): $_api_endpoint"
+        else
+            log_msg warn "Could not obtain scan_id from server; continuing without it"
+        fi
+    fi
+
+    # Write the newline-separated directory list to a temp file so the while
+    # loop runs in the current shell (not a subshell). A pipe would lose all
+    # counter increments (FILES_SCANNED etc.) due to POSIX subshell semantics.
+    _dirs_file="$(mktemp_portable)" || die "Could not create temp file for directory list"
+    TMP_FILES="${TMP_FILES} ${_dirs_file}"
+    printf '%s\n' "$SCAN_DIRS" > "$_dirs_file"
+
+    exec 3< "$_dirs_file"
+    while IFS= read -r _scandir <&3; do
+        [ "$INTERRUPTED" -eq 1 ] && break
+        [ -z "$_scandir" ] && continue
+
+        if [ ! -d "$_scandir" ]; then
+            log_msg warn "Skipping non-directory path '$_scandir'"
+            continue
+        fi
+
+        log_msg info "Scanning '$_scandir'"
+
+        _results_file="$(mktemp_portable)" || {
+            log_msg error "Could not create temporary file list for '$_scandir'"
+            continue
+        }
+        TMP_FILES="${TMP_FILES} ${_results_file}"
+
+        # Note: find without -print0 is safe for all filenames EXCEPT those
+        # containing literal newline characters (an extremely rare edge case).
+        # If your environment has such filenames, use thunderstorm-collector.sh
+        # (requires bash) which uses find -print0 + read -d ''.
+        # Build find exclusion arguments safely in a subshell to avoid
+        # clobbering positional parameters of the outer loop.
+        # The resulting find expression is:
+        #   find <dir> -path <excl1> -prune -o -path <excl2> -prune -o ... -type f -mtime <age> -print
+        # Each -prune -o short-circuits excluded paths; the final -type f -print
+        # matches only regular files in non-excluded subtrees.
+        (
+            set -- "$_scandir"
+            for _ep in $EXCLUDE_PATHS; do
+                [ -d "$_ep" ] && set -- "$@" -path "$_ep" -prune -o
+            done
+            _mount_file="$(mktemp_portable)" || true
+            if [ -n "$_mount_file" ]; then
+                get_excluded_mounts > "$_mount_file"
+                while IFS= read -r _ep; do
+                    [ -n "$_ep" ] && [ -d "$_ep" ] && set -- "$@" -path "$_ep" -prune -o
+                done < "$_mount_file"
+                rm -f "$_mount_file"
+            fi
+            set -- "$@" -type f -mtime "$_find_mtime" -print
+            find "$@"
+        ) > "$_results_file" 2>/dev/null || true
+
+        # Count total lines for progress reporting
+        _total_in_dir="$(wc -l < "$_results_file" 2>/dev/null | tr -d ' \t')"
+        [ -z "$_total_in_dir" ] && _total_in_dir=0
+        _current_in_dir=0
+
+        exec 4< "$_results_file"
+        while IFS= read -r _file_path <&4; do
+            [ "$INTERRUPTED" -eq 1 ] && break
+            [ -z "$_file_path" ] && continue
+
+            _current_in_dir=$((_current_in_dir + 1))
+
+            # Progress reporting (based on lines consumed, not files processed)
+            if [ "$PROGRESS" -eq 1 ] && [ "$_total_in_dir" -gt 0 ]; then
+                _pct=$(( _current_in_dir * 100 / _total_in_dir ))
+                printf '\r[%d/%d] %d%% - %s' "$_current_in_dir" "$_total_in_dir" "$_pct" "$_scandir" >&2
+                PROGRESS_ACTIVE=1
+            fi
+
+            [ -f "$_file_path" ] || continue
+
+            FILES_SCANNED=$((FILES_SCANNED + 1))
+
+            # Skip files inside cloud storage folders
+            if is_cloud_path "$_file_path"; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping cloud storage path '$_file_path'"
+                continue
+            fi
+
+            _size_kb="$(file_size_kb "$_file_path")"
+            if [ "$_size_kb" -lt 0 ]; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping unreadable file '$_file_path'"
+                continue
+            fi
+
+            if [ "$_size_kb" -gt "$MAX_FILE_SIZE_KB" ]; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping '$_file_path' due to size (${_size_kb}KB)"
+                continue
+            fi
+
+            log_msg debug "Submitting '$_file_path'"
+            if submit_file "$_api_endpoint" "$_file_path"; then
+                FILES_SUBMITTED=$((FILES_SUBMITTED + 1))
+            else
+                FILES_FAILED=$((FILES_FAILED + 1))
+                log_msg error "Could not upload '$_file_path'"
+            fi
+        done
+        exec 4<&-
+        # Clear progress line
+        if [ "$PROGRESS" -eq 1 ] && [ "$_total_in_dir" -gt 0 ]; then
+            printf '\r%80s\r' '' >&2
+            PROGRESS_ACTIVE=0
+        fi
+    done
+    exec 3<&-
+
+    if [ "$START_TS" -gt 0 ] 2>/dev/null; then
+        _elapsed=$(( $(date +%s 2>/dev/null || echo "$START_TS") - START_TS ))
+        [ "$_elapsed" -lt 0 ] && _elapsed=0
+    fi
+
+    log_msg info "Run completed: scanned=$FILES_SCANNED submitted=$FILES_SUBMITTED skipped=$FILES_SKIPPED failed=$FILES_FAILED seconds=$_elapsed"
+
+    # Send collection end marker with run statistics
+    if [ "$DRY_RUN" -eq 0 ]; then
+        _stats="\"stats\":{\"scanned\":${FILES_SCANNED},\"submitted\":${FILES_SUBMITTED},\"skipped\":${FILES_SKIPPED},\"failed\":${FILES_FAILED},\"elapsed_seconds\":${_elapsed}}"
+        collection_marker "$_base_url" "end" "$_SCAN_ID" "$_stats" >/dev/null
+    fi
+
+    # Exit code: 0 = success, 1 = partial failure (some uploads failed)
+    if [ "$FILES_FAILED" -gt 0 ]; then
+        return 1
+    fi
+    return 0
+}
+
+main "$@"
+exit $?
diff --git a/scripts/thunderstorm-collector-ps2.ps1 b/scripts/thunderstorm-collector-ps2.ps1
new file mode 100644
index 0000000..eac6715
--- /dev/null
+++ b/scripts/thunderstorm-collector-ps2.ps1
@@ -0,0 +1,989 @@
+##################################################
+# Script Title: THOR Thunderstorm Collector (PS 2)
+# Script File Name: thunderstorm-collector-ps2.ps1
+# Author: Florian Roth
+# Version: 0.1.0
+# Date Created: 22.02.2026
+# Last Modified: 22.02.2026
+# Compatibility: PowerShell 2.0+
+##################################################
+
+<#
+    .SYNOPSIS
+        The Thunderstorm Collector collects and submits files to THOR Thunderstorm servers for analysis.
+        This version is compatible with PowerShell 2.0+ (uses System.Net.HttpWebRequest instead of Invoke-WebRequest).
+    .DESCRIPTION
+        The Thunderstorm collector processes a local directory (C:\ by default) and selects files for submission.
+        This selection is based on various filters. The filters include file size, age, extension and location.
+    .PARAMETER ThunderstormServer
+        Server name (FQDN) or IP address of your Thunderstorm instance
+    .PARAMETER ThunderstormPort
+        Port number on which the Thunderstorm service is listening (default: 8080)
+    .PARAMETER Source
+        Source of the submission (default: hostname of the system)
+    .PARAMETER Folder
+        Folder to process (default: C:\)
+    .PARAMETER MaxAge
+        Select files based on the number of days in which the file has been created or modified (default: 14 days)
+    .PARAMETER MaxSize
+        Maximum file size in MegaBytes for submission (default: 2MB / 2048KB)
+    .PARAMETER Extensions
+        Extensions to select for submission (default: preset list)
+    .PARAMETER UseSSL
+        Use HTTPS instead of HTTP for Thunderstorm communication
+    .PARAMETER Debugging
+        Show debug output for troubleshooting purposes
+    .EXAMPLE
+        powershell.exe -ExecutionPolicy Bypass -File thunderstorm-collector-ps2.ps1 -ThunderstormServer ts.local
+    .EXAMPLE
+        powershell.exe -ExecutionPolicy Bypass -File thunderstorm-collector-ps2.ps1 -ThunderstormServer ts.local -MaxAge 1 -UseSSL
+#>
+
+# #####################################################################
+# Parameters ----------------------------------------------------------
+# #####################################################################
+
+param(
+    [Parameter(HelpMessage='Server name (FQDN) or IP address of your Thunderstorm instance')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('TS')]
+        [string]$ThunderstormServer,
+
+    [Parameter(HelpMessage='Port number on which the Thunderstorm service is listening (default: 8080)')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('TP')]
+        [int]$ThunderstormPort = 8080,
+
+    [Parameter(HelpMessage='Source of the submission (default: hostname of the system)')]
+        [Alias('S')]
+        [string]$Source=$env:COMPUTERNAME,
+
+    [Parameter(HelpMessage='Folder to process (default: C:\)')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('F')]
+        [string]$Folder = "C:\",
+
+    [Parameter(HelpMessage='Select files based on days since last modification (default: 14 days)')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('MA')]
+        [int]$MaxAge = 14,
+
+    [Parameter(HelpMessage='Maximum file size in MegaBytes (default: 2MB / 2048KB)')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('MS')]
+        [int]$MaxSize = 2,
+
+    [Parameter(HelpMessage='Extensions to select for submission')]
+        [ValidateNotNullOrEmpty()]
+        [Alias('E')]
+        [string[]]$Extensions,
+
+    [Parameter(HelpMessage='Submit all file extensions (overrides -Extensions)')]
+        [switch]$AllExtensions = $False,
+
+    [Parameter(HelpMessage='Use HTTPS instead of HTTP')]
+        [Alias('SSL')]
+        [switch]$UseSSL,
+
+    [Parameter(HelpMessage='Path to custom CA certificate bundle for TLS verification')]
+        [string]$CACert,
+
+    [Parameter(HelpMessage='Skip TLS certificate verification')]
+        [Alias('k')]
+        [switch]$Insecure,
+
+    [Parameter(HelpMessage='Force enable progress reporting')]
+        [switch]$Progress,
+
+    [Parameter(HelpMessage='Force disable progress reporting')]
+        [switch]$NoProgress,
+
+    [Parameter(HelpMessage='Enable debug output')]
+        [Alias('D')]
+        [switch]$Debugging
+)
+
+# Fixing Certain Platform Environments --------------------------------
+$AutoDetectPlatform = ""
+$OutputPath = $PSScriptRoot
+# When run via 'powershell -Command', $PSScriptRoot is empty; fall back to TEMP
+if ( -not $OutputPath -or $OutputPath -eq "" ) {
+    $OutputPath = $env:TEMP
+}
+$global:NoLog = $false
+
+# Microsoft Defender ATP - Live Response
+if ( $OutputPath -eq "" -or $OutputPath -like "*Advanced Threat Protection*" ) {
+    $AutoDetectPlatform = "MDATP"
+    if ( $OutputPath -eq "" ) {
+        $OutputPath = "$($env:ProgramData)\thor"
+    }
+}
+
+# #####################################################################
+# Presets -------------------------------------------------------------
+# #####################################################################
+
+# Maximum Size - apply default only when not explicitly passed
+if (-not $PSBoundParameters.ContainsKey('MaxSize')) {
+    [int]$MaxSize = 2
+}
+
+# Extensions
+# -AllExtensions overrides any -Extensions value
+# Note: PS 2.0 permanently binds parameter validation to $Extensions,
+# so we use a separate $ActiveExtensions variable for the working copy.
+if ($AllExtensions) {
+    [string[]]$ActiveExtensions = @()
+} elseif ($PSBoundParameters.ContainsKey('Extensions')) {
+    [string[]]$ActiveExtensions = $Extensions
+} else {
+    # Apply recommended preset only when no -Extensions parameter was explicitly passed
+    [string[]]$ActiveExtensions = @('.asp','.vbs','.ps','.ps1','.rar','.tmp','.bas','.bat','.chm','.cmd','.com','.cpl','.crt','.dll','.exe','.hta','.js','.lnk','.msc','.ocx','.pcd','.pif','.pot','.reg','.scr','.sct','.sys','.url','.vb','.vbe','.vbs','.wsc','.wsf','.wsh','.ct','.t','.input','.war','.jsp','.php','.asp','.aspx','.doc','.docx','.pdf','.xls','.xlsx','.ppt','.pptx','.tmp','.log','.dump','.pwd','.w','.txt','.conf','.cfg','.conf','.config','.psd1','.psm1','.ps1xml','.clixml','.psc1','.pssc','.pl','.www','.rdp','.jar','.docm','.ace','.job','.temp','.plg','.asm')
+}
+
+# Debug
+$Debug = $Debugging
+
+# Progress reporting: auto-detect TTY unless overridden
+$ShowProgress = $false
+if ($Progress) {
+    $ShowProgress = $true
+} elseif ($NoProgress) {
+    $ShowProgress = $false
+} else {
+    # Auto-detect: check if stdout is interactive (TTY)
+    try {
+        # First check if the environment is interactive at all
+        if (-not [Environment]::UserInteractive) {
+            $ShowProgress = $false
+        } else {
+            # Check if output is redirected (.NET 4.5+ only)
+            $isRedirected = $false
+            try {
+                $isRedirected = [Console]::IsOutputRedirected
+            } catch {
+                # Property not available in older .NET; fall back to host check
+                $isRedirected = $false
+            }
+            if ($isRedirected) {
+                $ShowProgress = $false
+            } else {
+                # Verify we have a real console window (not a non-interactive host)
+                $hostName = $Host.Name
+                if ($hostName -eq 'ConsoleHost') {
+                    $ShowProgress = [Console]::WindowWidth -gt 0
+                } else {
+                    # ISE, remoting, custom hosts -- no carriage-return progress
+                    $ShowProgress = $false
+                }
+            }
+        }
+    } catch {
+        $ShowProgress = $false
+    }
+}
+
+# Show Help -----------------------------------------------------------
+if ( $ThunderstormServer -eq "" ) {
+    Get-Help $MyInvocation.MyCommand.Definition -Detailed
+    Write-Host -ForegroundColor Yellow 'Note: You must at least define a Thunderstorm server (-ThunderstormServer)'
+    exit 2
+}
+
+# #####################################################################
+# Functions -----------------------------------------------------------
+# #####################################################################
+
+function Write-Log {
+    param (
+        [Parameter(Mandatory=$True, Position=0, HelpMessage="Log entry")]
+            [ValidateNotNullOrEmpty()]
+            [String]$Entry,
+
+        [Parameter(Position=1, HelpMessage="Log file to write into")]
+            [ValidateNotNullOrEmpty()]
+            [Alias('SS')]
+            [string]$LogFile = "thunderstorm-collector.log",
+
+        [Parameter(Position=3, HelpMessage="Level")]
+            [ValidateNotNullOrEmpty()]
+            [String]$Level = "Info"
+    )
+
+    # Indicator
+    $Indicator = "[+]"
+    if ( $Level -eq "Warning" ) {
+        $Indicator = "[!]"
+    } elseif ( $Level -eq "Error" ) {
+        $Indicator = "[E]"
+    } elseif ( $Level -eq "Progress" ) {
+        $Indicator = "[.]"
+    } elseif ($Level -eq "Note" ) {
+        $Indicator = "[i]"
+    }
+
+    # Output Pipe
+    if ( $Level -eq "Warning" ) {
+        Write-Warning "$($Indicator) $($Entry)"
+    } elseif ( $Level -eq "Error" ) {
+        [Console]::Error.WriteLine("$($Indicator) $($Entry)")
+    } elseif ( $Level -eq "Debug" -and $Debug -eq $False ) {
+        return
+    } else {
+        Write-Host "$($Indicator) $($Entry)"
+    }
+
+    # Log File
+    if ( $global:NoLog -eq $False ) {
+        try {
+            $ts = Get-Date -Format 'yyyy-MM-dd HH:mm:ss.fff'
+            $LogFilePath = $LogFile
+            if ($OutputPath -and (Test-Path $OutputPath -PathType Container)) {
+                $LogFilePath = Join-Path $OutputPath $LogFile
+            }
+            "$ts $($env:COMPUTERNAME): $Entry" | Out-File -FilePath $LogFilePath -Append
+        } catch {
+            # Logging failure should not affect collection
+        }
+    }
+}
+
+# Submit-File: uploads a file using System.Net.HttpWebRequest (PS 2.0 compatible)
+# Streams file content directly from disk to avoid loading entire file into memory.
+# Returns the HTTP status code (int) or 0 on connection failure.
+function Submit-File {
+    param(
+        [Parameter(Mandatory=$True)][string]$Url,
+        [Parameter(Mandatory=$True)][string]$FilePath,
+        [Parameter(Mandatory=$True)][long]$FileSize
+    )
+
+    $boundary = [System.Guid]::NewGuid().ToString()
+    $CRLF = "`r`n"
+
+    # Build multipart metadata fields for hostname, source, and filename
+    # Keep full client path in multipart filename for parity with other collectors.
+    $FileName = $FilePath
+    $EncodedFilename = [uri]::EscapeDataString($FileName)
+
+    # File part header and footer
+    # Use RFC 5987 encoding for filename to safely handle special characters
+    # Build ASCII-safe fallback filename: replace non-ASCII and control chars with underscores
+    $SafeAsciiFilename = ""
+    foreach ($ch in $FileName.ToCharArray()) {
+        $code = [int]$ch
+        if ($code -ge 0x20 -and $code -le 0x7E -and $ch -ne '"' -and $ch -ne '\') {
+            $SafeAsciiFilename += $ch
+        } else {
+            $SafeAsciiFilename += '_'
+        }
+    }
+    if ($SafeAsciiFilename -eq '') { $SafeAsciiFilename = 'upload' }
+    $fileHeaderText = "--$boundary$CRLF" +
+        "Content-Disposition: form-data; name=`"file`"; filename=`"$SafeAsciiFilename`"; filename*=UTF-8''$EncodedFilename$CRLF" +
+        "Content-Type: application/octet-stream$CRLF$CRLF"
+    $footerText = "$CRLF--$boundary--$CRLF"
+
+    $fileHeaderBytes = [System.Text.Encoding]::UTF8.GetBytes($fileHeaderText)
+    $footerBytes = [System.Text.Encoding]::UTF8.GetBytes($footerText)
+
+    try {
+        # Open the file first to get authoritative size and fail fast if locked/missing
+        $fileStream = $null
+        try {
+            $fileStream = [System.IO.File]::Open($FilePath, [System.IO.FileMode]::Open, [System.IO.FileAccess]::Read, [System.IO.FileShare]::ReadWrite)
+        } catch {
+            Write-Log "Cannot open file: $FilePath - $($_.Exception.Message)" -Level "Error"
+            return -1
+        }
+
+        $actualFileSize = $fileStream.Length
+        $contentLength = $fileHeaderBytes.Length + $actualFileSize + $footerBytes.Length
+
+        $request = [System.Net.HttpWebRequest]::Create($Url)
+        $request.Method = "POST"
+        $request.ContentType = "multipart/form-data; boundary=$boundary"
+        $request.ContentLength = $contentLength
+        $request.Timeout = 120000  # 120 seconds
+        $request.AllowAutoRedirect = $true
+        $request.AllowWriteStreamBuffering = $false
+        $request.Headers.Add("X-Hostname", $env:COMPUTERNAME)
+
+        # Stream metadata and file content directly into the request stream
+        $stream = $null
+        try {
+            $stream = $request.GetRequestStream()
+            
+            $stream.Write($fileHeaderBytes, 0, $fileHeaderBytes.Length)
+
+            try {
+                $buffer = New-Object byte[] 65536
+                $totalBytesWritten = [long]0
+                $bytesRead = 0
+                do {
+                    $bytesRead = $fileStream.Read($buffer, 0, $buffer.Length)
+                    if ($bytesRead -gt 0) {
+                        # Clamp to declared size to prevent writing more than ContentLength
+                        $remaining = $actualFileSize - $totalBytesWritten
+                        if ($bytesRead -gt $remaining) { $bytesRead = [int]$remaining }
+                        if ($bytesRead -le 0) { break }
+                        $stream.Write($buffer, 0, $bytesRead)
+                        $totalBytesWritten += $bytesRead
+                    }
+                } while ($bytesRead -gt 0 -and $totalBytesWritten -lt $actualFileSize)
+            } finally {
+                if ($fileStream -ne $null) { $fileStream.Close() }
+            }
+
+            $stream.Write($footerBytes, 0, $footerBytes.Length)
+        } finally {
+            if ($stream -ne $null) { $stream.Close() }
+        }
+
+        $response = $request.GetResponse()
+        $statusCode = [int]$response.StatusCode
+        $response.Close()
+        return $statusCode
+    }
+    catch [System.Net.WebException] {
+        $ex = $_.Exception
+        if ( $ex.Response -ne $null ) {
+            $errResponse = $ex.Response
+            $statusCode = [int]$errResponse.StatusCode
+
+            # Extract Retry-After header if present
+            $retryAfter = $errResponse.Headers["Retry-After"]
+            if ( $retryAfter -ne $null ) {
+                $script:LastRetryAfter = $retryAfter
+            }
+
+            $errResponse.Close()
+            return $statusCode
+        }
+        # No response at all (connection refused, DNS failure, etc.)
+        Write-Log "Connection error: $($ex.Message)" -Level "Error"
+        return 0
+    }
+}
+
+# #####################################################################
+# Main Program --------------------------------------------------------
+# #####################################################################
+
+Write-Host "=============================================================="
+Write-Host "    ________                __            __                  "
+Write-Host "   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _    "
+Write-Host "    / / / _ \/ // / _ \/ _  / -_) __(_--/ __/ _ \/ __/  ' \   "
+Write-Host "   /_/ /_//_/\_,_/_//_/\_,_/\__/_/ /___/\__/\___/_/ /_/_/_/   "
+Write-Host "                                                              "
+Write-Host "   Florian Roth, Nextron Systems GmbH, 2020-2026              "
+Write-Host "   PowerShell 2.0+ compatible version                         "
+Write-Host "                                                              "
+Write-Host "=============================================================="
+
+# Measure time
+$global:StartTime = Get-Date
+
+Write-Log "Started Thunderstorm Collector (PS2) with PowerShell v$($PSVersionTable.PSVersion)"
+
+# ---------------------------------------------------------------------
+# Evaluation ----------------------------------------------------------
+# ---------------------------------------------------------------------
+
+# Output Info on Auto-Detection
+if ( $AutoDetectPlatform -ne "" ) {
+    Write-Log "Auto Detect Platform: $($AutoDetectPlatform)"
+    Write-Log "Note: Some automatic changes have been applied"
+}
+
+# Validate folder exists
+if (-not (Test-Path -Path $Folder -PathType Container)) {
+    Write-Log "Folder not found: $Folder" -Level "Error"
+    exit 2
+}
+
+# TLS Configuration
+$Protocol = "http"
+if ( $UseSSL ) {
+    $Protocol = "https"
+    try {
+        # .NET 4.5+ enum values; TLS 1.2 = 3072, TLS 1.3 = 12288
+        [System.Net.ServicePointManager]::SecurityProtocol = 3072 -bor 12288
+    } catch {
+        try {
+            # Fall back to TLS 1.2 only
+            [System.Net.ServicePointManager]::SecurityProtocol = 3072
+        } catch {
+            Write-Log "WARNING: Could not set TLS 1.2. HTTPS may fail on this system." -Level "Warning"
+        }
+    }
+    # Reject conflicting TLS options
+    if ( $Insecure -and $CACert ) {
+        Write-Log "Cannot use both -Insecure and -CACert at the same time" -Level "Error"
+        exit 2
+    }
+    # Handle --insecure: skip certificate validation
+    if ( $Insecure ) {
+        [System.Net.ServicePointManager]::ServerCertificateValidationCallback = { $true }
+        Write-Log "TLS certificate verification DISABLED (insecure mode)" -Level "Warning"
+    }
+    # Handle --ca-cert: custom CA bundle (single cert or PEM bundle)
+    if ( $CACert ) {
+        if ( -not (Test-Path $CACert) ) {
+            Write-Log "CA certificate file not found: $CACert" -Level "Error"
+            exit 2
+        }
+        try {
+            # Try to load as a PEM bundle containing multiple certificates
+            $caCerts = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2Collection
+            $pemContent = [System.IO.File]::ReadAllText($CACert)
+            $pemPattern = '-----BEGIN CERTIFICATE-----[^-]+-----END CERTIFICATE-----'
+            $pemMatches = [regex]::Matches($pemContent, $pemPattern)
+            if ($pemMatches.Count -gt 0) {
+                foreach ($pemMatch in $pemMatches) {
+                    $certText = $pemMatch.Value -replace '-----BEGIN CERTIFICATE-----', '' -replace '-----END CERTIFICATE-----', ''
+                    $certText = $certText.Trim()
+                    $certBytes = [Convert]::FromBase64String($certText)
+                    $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2(,$certBytes)
+                    $caCerts.Add($cert) | Out-Null
+                }
+                Write-Log "Loaded $($caCerts.Count) certificate(s) from CA bundle: $CACert"
+            } else {
+                # Try loading as a single DER/PFX certificate file
+                $cert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($CACert)
+                $caCerts.Add($cert) | Out-Null
+                Write-Log "Loaded single CA certificate: $CACert"
+            }
+            if ($caCerts.Count -eq 0) {
+                Write-Log "No certificates found in CA file: $CACert" -Level "Error"
+                exit 2
+            }
+            [System.Net.ServicePointManager]::ServerCertificateValidationCallback = {
+                param($sender, $certificate, $chain, $sslPolicyErrors)
+                # Build a chain using the provided CA certificates
+                $chainObj = New-Object System.Security.Cryptography.X509Certificates.X509Chain
+                foreach ($ca in $caCerts) {
+                    $chainObj.ChainPolicy.ExtraStore.Add($ca) | Out-Null
+                }
+                $chainObj.ChainPolicy.VerificationFlags = [System.Security.Cryptography.X509Certificates.X509VerificationFlags]::AllowUnknownCertificateAuthority
+                $chainObj.ChainPolicy.RevocationMode = [System.Security.Cryptography.X509Certificates.X509RevocationMode]::NoCheck
+                $valid = $chainObj.Build($certificate)
+                if (-not $valid) { return $false }
+                # Verify that the chain root is one of the supplied CA certificates
+                $chainRoot = $chainObj.ChainElements[$chainObj.ChainElements.Count - 1].Certificate
+                $rootThumbprint = $chainRoot.Thumbprint
+                $anchored = $false
+                foreach ($ca in $caCerts) {
+                    if ($ca.Thumbprint -eq $rootThumbprint) {
+                        $anchored = $true
+                        break
+                    }
+                }
+                return $anchored
+            }
+        } catch {
+            Write-Log "Failed to load CA certificate: $_" -Level "Error"
+            exit 2
+        }
+    }
+    Write-Log "HTTPS mode enabled"
+}
+
+# URL Creation
+$SourceParam = ""
+if ( $Source -ne "" ) {
+    Write-Log "Using Source: $($Source)"
+    # URL-encode the source parameter
+    $EncodedSource = [uri]::EscapeDataString($Source)
+    $SourceParam = "?source=$EncodedSource"
+}
+$BaseUrl = "$($Protocol)://$($ThunderstormServer):$($ThunderstormPort)"
+$Url = "$BaseUrl/api/checkAsync$($SourceParam)"
+Write-Log "Sending to URI: $($Url)" -Level "Debug"
+$ScanId = ""
+
+# PS 2.0 compatible JSON escape helper -- single-pass over original string
+function Escape-JsonString {
+    param([string]$s)
+    if ($s -eq $null) { return "" }
+    $sb = New-Object System.Text.StringBuilder
+    foreach ($c in $s.ToCharArray()) {
+        $code = [int]$c
+        switch ($c) {
+            '"'  { $sb.Append('\"') | Out-Null }
+            '\'  { $sb.Append('\\') | Out-Null }
+            "`r" { $sb.Append('\r') | Out-Null }
+            "`n" { $sb.Append('\n') | Out-Null }
+            "`t" { $sb.Append('\t') | Out-Null }
+            default {
+                if ($code -eq 0x08) {
+                    $sb.Append('\b') | Out-Null
+                } elseif ($code -eq 0x0C) {
+                    $sb.Append('\f') | Out-Null
+                } elseif ($code -lt 0x20) {
+                    $sb.Append(('\u{0:X4}' -f $code)) | Out-Null
+                } else {
+                    $sb.Append($c) | Out-Null
+                }
+            }
+        }
+    }
+    return $sb.ToString()
+}
+
+# PS 2.0 compatible: extract a JSON string value by key (handles escaped characters)
+function Get-JsonValue {
+    param([string]$Json, [string]$Key)
+    $pattern = '"' + [regex]::Escape($Key) + '"\s*:\s*"((?:\\.|[^"\\])*)"'
+    if ($Json -match $pattern) {
+        # Unescape JSON string escapes
+        # Order matters: \\ must be replaced last to avoid corrupting sequences like \\n
+        # We use a placeholder to avoid double-replacement issues
+        $val = $matches[1]
+        $val = $val.Replace('\\', "`0BACKSLASH`0")
+        $val = $val.Replace('\"', '"')
+        $val = $val.Replace('\/', '/')
+        $val = $val.Replace('\n', "`n")
+        $val = $val.Replace('\r', "`r")
+        $val = $val.Replace('\t', "`t")
+        $val = $val.Replace('\b', "`b")
+        $val = $val.Replace('\f', [string][char]0x0C)
+        $val = $val.Replace("`0BACKSLASH`0", '\')
+        # Unescape \uXXXX sequences (including surrogate pairs)
+        $val = [regex]::Replace($val, '\\u([0-9a-fA-F]{4})(?:\\u([0-9a-fA-F]{4}))?', {
+            param($m)
+            $cp1 = [int]('0x' + $m.Groups[1].Value)
+            if ($m.Groups[2].Success) {
+                $cp2 = [int]('0x' + $m.Groups[2].Value)
+                # Check if this is a surrogate pair (high surrogate + low surrogate)
+                if ($cp1 -ge 0xD800 -and $cp1 -le 0xDBFF -and $cp2 -ge 0xDC00 -and $cp2 -le 0xDFFF) {
+                    return [char]::ConvertFromUtf32((($cp1 - 0xD800) * 0x400) + ($cp2 - 0xDC00) + 0x10000)
+                } else {
+                    # Not a surrogate pair, decode independently (second \uXXXX will be re-matched)
+                    return [char]$cp1 + [char]$cp2
+                }
+            } else {
+                # Single code unit - reject lone surrogates, decode normally
+                if ($cp1 -ge 0xD800 -and $cp1 -le 0xDFFF) {
+                    return $m.Value  # Leave lone surrogate escaped
+                }
+                return [char]$cp1
+            }
+        })
+        return $val
+    }
+    return ""
+}
+
+function Send-CollectionMarker {
+    param(
+        [string]$MarkerType,
+        [string]$ScanId = "",
+        [hashtable]$Stats = $null
+    )
+    $MarkerUrl = "$BaseUrl/api/collection"
+    $SourceVal = $Source
+    if (-not $SourceVal) { $SourceVal = $env:COMPUTERNAME }
+    $Timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+
+    # Build JSON manually for PS 2.0 compatibility
+    $JsonParts = New-Object System.Collections.ArrayList
+    $JsonParts.Add(('"type":"{0}"' -f (Escape-JsonString $MarkerType))) | Out-Null
+    $JsonParts.Add(('"source":"{0}"' -f (Escape-JsonString $SourceVal))) | Out-Null
+    $JsonParts.Add('"collector":"powershell2/1.0"') | Out-Null
+    $JsonParts.Add(('"timestamp":"{0}"' -f (Escape-JsonString $Timestamp))) | Out-Null
+    if ($ScanId) {
+        $JsonParts.Add(('"scan_id":"{0}"' -f (Escape-JsonString $ScanId))) | Out-Null
+    }
+    if ($Stats) {
+        $StatParts = New-Object System.Collections.ArrayList
+        foreach ($key in $Stats.Keys) {
+            $val = $Stats[$key]
+            if ($val -is [int] -or $val -is [long] -or $val -is [double]) {
+                $StatParts.Add(('"' + (Escape-JsonString $key) + '":' + $val.ToString())) | Out-Null
+            } else {
+                $StatParts.Add(('"' + (Escape-JsonString $key) + '":"' + (Escape-JsonString ([string]$val)) + '"')) | Out-Null
+            }
+        }
+        $JsonParts.Add(('"stats":{{{0}}}' -f ($StatParts -join ','))) | Out-Null
+    }
+    $JsonBody = '{' + ($JsonParts -join ',') + '}'
+
+    try {
+        $JsonBytes = [System.Text.Encoding]::UTF8.GetBytes($JsonBody)
+        $Req = [System.Net.HttpWebRequest]::Create($MarkerUrl)
+        $Req.Method = "POST"
+        $Req.ContentType = "application/json"
+        $Req.ContentLength = $JsonBytes.Length
+        $Req.Timeout = 10000
+        $Stream = $Req.GetRequestStream()
+        $Stream.Write($JsonBytes, 0, $JsonBytes.Length)
+        $Stream.Close()
+        $Resp = $Req.GetResponse()
+        $httpStatus = [int]$Resp.StatusCode
+        $Reader = New-Object System.IO.StreamReader($Resp.GetResponseStream())
+        $RespBody = $Reader.ReadToEnd()
+        $Reader.Close()
+        $Resp.Close()
+
+        # Validate HTTP success first, then attempt scan_id extraction
+        if ($httpStatus -lt 200 -or $httpStatus -ge 300) {
+            Write-Log "Collection marker '$MarkerType' returned unexpected HTTP $httpStatus" -Level "Error"
+            Write-Log "Response body: $RespBody" -Level "Debug"
+            return ""
+        }
+
+        $scanIdResult = Get-JsonValue -Json $RespBody -Key "scan_id"
+        if (-not $scanIdResult) {
+            Write-Log "Collection marker '$MarkerType' HTTP $httpStatus OK but no scan_id found in response" -Level "Warning"
+            Write-Log "Response body: $RespBody" -Level "Debug"
+            # Return a sentinel value to distinguish "HTTP success but no scan_id" from total failure
+            # This allows the caller to know the server was reached successfully
+            return "__NO_SCAN_ID__"
+        }
+        return $scanIdResult
+    } catch [System.Net.WebException] {
+        $ex = $_.Exception
+        if ($ex.Response -ne $null) {
+            $errCode = [int]$ex.Response.StatusCode
+            # 404 or 501 means the server doesn't support collection markers -- continue without scan_id
+            if ($errCode -eq 404 -or $errCode -eq 501) {
+                Write-Log "Collection marker '$MarkerType' not supported (HTTP $errCode) -- server does not implement /api/collection" -Level "Debug"
+                return "__MARKER_UNSUPPORTED__"
+            }
+            Write-Log "Collection marker '$MarkerType' failed with HTTP $errCode" -Level "Error"
+            try {
+                $errReader = New-Object System.IO.StreamReader($ex.Response.GetResponseStream())
+                $errBody = $errReader.ReadToEnd()
+                $errReader.Close()
+                Write-Log "Error response body: $errBody" -Level "Debug"
+            } catch {}
+            $ex.Response.Close()
+        } else {
+            Write-Log "Collection marker '$MarkerType' failed: $($ex.Message)" -Level "Error"
+        }
+        return ""
+    } catch {
+        Write-Log "Collection marker '$MarkerType' failed: $_" -Level "Error"
+        return ""
+    }
+}
+
+# ---------------------------------------------------------------------
+# Run THOR Thunderstorm Collector -------------------------------------
+# ---------------------------------------------------------------------
+
+$global:SubmittedCount = 0
+$global:ErrorCount = 0
+$global:ScannedCount = 0
+$global:SkippedCount = 0
+$global:MarkersSupported = $true
+
+# Send collection begin marker with single retry on failure
+$global:ScanId = Send-CollectionMarker -MarkerType "begin"
+if ($global:ScanId -eq "__MARKER_UNSUPPORTED__") {
+    $global:MarkersSupported = $false
+    $global:ScanId = ""
+} elseif (-not $global:ScanId) {
+    Write-Log "Begin marker failed - retrying in 2 seconds..." -Level "Warning"
+    Start-Sleep -Seconds 2
+    $global:ScanId = Send-CollectionMarker -MarkerType "begin"
+    if ($global:ScanId -eq "__MARKER_UNSUPPORTED__") {
+        $global:MarkersSupported = $false
+        $global:ScanId = ""
+    }
+}
+if (-not $global:MarkersSupported) {
+    Write-Log "Collection marker endpoint unavailable -- continuing without markers" -Level "Debug"
+} elseif (-not $global:ScanId) {
+    Write-Log "Could not connect to Thunderstorm server at $BaseUrl - exiting" -Level "Error"
+    exit 2
+}
+# Handle case where server responded OK but did not return a scan_id
+if ($global:ScanId -eq "__NO_SCAN_ID__") {
+    Write-Log "Begin marker succeeded but server did not return a scan_id -- continuing without scan_id" -Level "Warning"
+    $global:ScanId = ""
+}
+if ($global:ScanId) {
+    Write-Log "Collection scan_id: $($global:ScanId)"
+    # First parameter uses '?' so subsequent ones use '&'
+    if ($SourceParam -ne "") {
+        $Url = "$Url&scan_id=$([uri]::EscapeDataString($global:ScanId))"
+    } else {
+        $Url = "$Url`?scan_id=$([uri]::EscapeDataString($global:ScanId))"
+    }
+}
+
+# Signal handling: register handler to send interrupted marker on Ctrl+C / SIGTERM
+$global:Interrupted = $false
+$global:InterruptedMarkerSent = $false
+
+# Function to send interrupted marker exactly once
+function Send-InterruptedMarkerOnce {
+    if (-not $global:MarkersSupported) { return }
+    if ($global:InterruptedMarkerSent) { return }
+    $global:InterruptedMarkerSent = $true
+    $global:Interrupted = $true
+    try {
+        Write-Log "Sending interrupted collection marker" -Level "Warning"
+        Send-CollectionMarker -MarkerType "interrupted" -ScanId $global:ScanId -Stats @{
+            scanned         = $global:ScannedCount
+            submitted       = $global:SubmittedCount
+            skipped         = $global:SkippedCount
+            failed          = $global:ErrorCount
+            elapsed_seconds = [int]((Get-Date) - $global:StartTime).TotalSeconds
+        } | Out-Null
+    } catch {
+        # Best-effort: don't let marker send failure prevent shutdown
+    }
+}
+
+
+# PS 2.0 compatible Ctrl+C handling via Register-ObjectEvent on [Console]::CancelKeyPress
+try {
+    [Console]::TreatControlCAsInput = $false
+    Register-ObjectEvent -InputObject ([Console]) -EventName CancelKeyPress -Action {
+        $Event.SourceEventArgs.Cancel = $true
+        $global:Interrupted = $true
+        Send-InterruptedMarkerOnce
+    } | Out-Null
+    Write-Log "Registered Ctrl+C handler via Register-ObjectEvent" -Level "Debug"
+} catch {
+    # Fallback: try direct .NET event subscription
+    try {
+        $handler = [System.ConsoleCancelEventHandler]{
+            param($sender, $e)
+            $e.Cancel = $true
+            $global:Interrupted = $true
+            Send-InterruptedMarkerOnce
+        }
+        [Console]::add_CancelKeyPress($handler)
+        Write-Log "Registered Ctrl+C handler via add_CancelKeyPress" -Level "Debug"
+    } catch {
+        Write-Log "Could not register Ctrl+C handler - interrupted markers on SIGINT not available" -Level "Debug"
+    }
+}
+
+# Note: PowerShell.Exiting fires on ALL exits (including normal completion),
+# so we do NOT register it -- it would incorrectly send an "interrupted" marker
+# on clean runs. SIGTERM handling in PS 2.0 is a known limitation.
+
+# trap statement for catchable terminating errors within the script scope
+trap {
+    Send-InterruptedMarkerOnce
+    break
+}
+
+# PS 2 compatible file enumeration (Get-ChildItem -File not available in PS 2)
+# Use incremental enumeration to avoid loading entire file tree into memory.
+# When progress is enabled, do a lightweight count pass first; otherwise process incrementally.
+Write-Log "Scanning files in $Folder ..."
+$TotalFiles = 0
+if ($ShowProgress) {
+    Write-Log "Counting files for progress reporting ..."
+    # Count pass: use Measure-Object to avoid storing all FileInfo objects
+    $countResult = Get-ChildItem -Path $Folder -Recurse -ErrorAction SilentlyContinue | Where-Object { -not $_.PSIsContainer -and -not ($_.Attributes -band [System.IO.FileAttributes]::ReparsePoint) } | Measure-Object
+    $TotalFiles = $countResult.Count
+    Write-Log "Found $TotalFiles files to evaluate in $Folder"
+}
+
+# Use GetEnumerator on the pipeline output to allow 'break' without materializing all results
+$fileEnumerator = $null
+try {
+    $fileEnumerator = (Get-ChildItem -Path $Folder -Recurse -ErrorAction SilentlyContinue | Where-Object { -not $_.PSIsContainer }).GetEnumerator()
+} catch {
+    # GetEnumerator may fail if result is $null (empty folder) or a single item
+    $singleResult = Get-ChildItem -Path $Folder -Recurse -ErrorAction SilentlyContinue | Where-Object { -not $_.PSIsContainer }
+    if ($singleResult -eq $null) {
+        $fileEnumerator = @().GetEnumerator()
+    } else {
+        $fileEnumerator = @($singleResult).GetEnumerator()
+    }
+}
+
+while ($fileEnumerator.MoveNext()) {
+    $file = $fileEnumerator.Current
+
+    # Check for interruption
+    if ($global:Interrupted) {
+        Write-Log "Interrupted by user signal" -Level "Warning"
+        break
+    }
+
+    # -----------------------------------------------------------------
+    # Filter ----------------------------------------------------------
+
+    $global:ScannedCount++
+
+    # -----------------------------------------------------------------
+    # Progress --------------------------------------------------------
+    if ($ShowProgress -and $TotalFiles -gt 0) {
+        $Pct = [int](($global:ScannedCount / $TotalFiles) * 100)
+        if ($Pct -gt 100) { $Pct = 100 }
+        Write-Host -NoNewline ("`r[{0}/{1}] {2}%  " -f $global:ScannedCount, $TotalFiles, $Pct)
+    } elseif ($ShowProgress) {
+        # No total count available; show scanned count only
+        Write-Host -NoNewline ("`r[{0}] scanning...  " -f $global:ScannedCount)
+    }
+
+    # Symlink Check — skip symbolic links (security: prevent directory escape)
+    # PS 2.0 compatible: check Attributes for ReparsePoint flag
+    if ( $file.Attributes -band [System.IO.FileAttributes]::ReparsePoint ) {
+        Write-Log "$($file.Name) skipped (symbolic link)" -Level "Debug"
+        $global:SkippedCount++
+        continue
+    }
+
+    # Size Check
+    if ( ( $file.Length / 1MB ) -gt $MaxSize ) {
+        Write-Log "$($file.Name) skipped due to size filter" -Level "Debug"
+        $global:SkippedCount++
+        continue
+    }
+
+    # Age Check
+    if ( $MaxAge -gt 0 ) {
+        if ( $file.LastWriteTime -lt (Get-Date).AddDays(-$MaxAge) ) {
+            Write-Log "$($file.Name) skipped due to age filter" -Level "Debug"
+            $global:SkippedCount++
+            continue
+        }
+    }
+
+    # Extensions Check
+    if ( $ActiveExtensions.Length -gt 0 ) {
+        $match = $false
+        foreach ( $ext in $ActiveExtensions ) {
+            if ( $file.Extension -eq $ext ) { $match = $true; break }
+        }
+        if ( -not $match ) {
+            Write-Log "$($file.Name) skipped due to extension filter" -Level "Debug"
+            $global:SkippedCount++
+            continue
+        }
+    }
+
+    # -----------------------------------------------------------------
+    # Submission ------------------------------------------------------
+
+    Write-Log "Processing $($file.FullName) ..." -Level "Debug"
+
+    # Submit with retry logic (file is streamed from disk, not loaded into memory)
+    $StatusCode = 0
+    $Retries = 0
+    $MaxRetries = 3
+    $Max503Retries = 10
+    $Retries503 = 0
+    $script:LastRetryAfter = $null
+    $FileSubmitted = $false
+    $FileRetryStart = Get-Date
+    $MaxRetrySeconds = 300  # Cap total retry time per file at 5 minutes
+
+    while ( $StatusCode -lt 200 -or $StatusCode -ge 300 ) {
+        if ($global:Interrupted) { break }
+        # Check total elapsed retry time for this file
+        if (((Get-Date) - $FileRetryStart).TotalSeconds -gt $MaxRetrySeconds) {
+            Write-Log "Total retry time exceeded ${MaxRetrySeconds}s - giving up on $($file.FullName)" -Level "Error"
+            $global:ErrorCount++
+            break
+        }
+
+        Write-Log "Submitting to Thunderstorm server: $($file.FullName) ..." -Level "Info"
+        $StatusCode = Submit-File -Url $Url -FilePath $file.FullName -FileSize $file.Length
+
+        if ( $StatusCode -ge 200 -and $StatusCode -lt 300 ) {
+            $global:SubmittedCount++
+            $FileSubmitted = $true
+            break
+        }
+        elseif ( $StatusCode -eq -1 ) {
+            # File could not be opened (missing, locked, permission denied) -- no retry
+            Write-Log "Skipping file due to open failure: $($file.FullName)" -Level "Error"
+            $global:ErrorCount++
+            break
+        }
+        elseif ( $StatusCode -eq 503 ) {
+            $Retries503++
+            if ( $Retries503 -ge $Max503Retries ) {
+                Write-Log "503: Server still busy after $Max503Retries retries - giving up on $($file.FullName)" -Level "Warning"
+                $global:ErrorCount++
+                break
+            }
+            $WaitSecs = 3
+            if ( $script:LastRetryAfter -ne $null ) {
+                try {
+                    $WaitSecs = [int]$script:LastRetryAfter
+                    if ($WaitSecs -lt 1) { $WaitSecs = 3 }
+                    if ($WaitSecs -gt 60) { $WaitSecs = 60 }
+                } catch { $WaitSecs = 3 }
+            }
+            Write-Log "503: Server seems busy - retrying in $WaitSecs seconds ($Retries503/$Max503Retries)" -Level "Warning"
+            Start-Sleep -Seconds $WaitSecs
+        }
+        elseif ( $StatusCode -eq 0 ) {
+            # Connection failure
+            $Retries++
+            if ( $Retries -ge $MaxRetries ) {
+                Write-Log "Connection failed after $MaxRetries retries - giving up on $($file.FullName)" -Level "Error"
+                $global:ErrorCount++
+                break
+            }
+            $SleepTime = [int](2 * [Math]::Pow(2, $Retries - 1))
+            Write-Log "Connection failed - retrying in $SleepTime seconds ($Retries/$MaxRetries)" -Level "Warning"
+            Start-Sleep -Seconds $SleepTime
+        }
+        else {
+            $Retries++
+            if ( $Retries -ge $MaxRetries ) {
+                Write-Log "$($StatusCode): Server error after $MaxRetries retries - giving up on $($file.FullName)" -Level "Error"
+                $global:ErrorCount++
+                break
+            }
+            $SleepTime = [int](2 * [Math]::Pow(2, $Retries - 1))
+            Write-Log "$($StatusCode): Server has problems - retrying in $SleepTime seconds ($Retries/$MaxRetries)" -Level "Warning"
+            Start-Sleep -Seconds $SleepTime
+        }
+    }
+}
+
+# Clear progress line if it was shown
+if ($ShowProgress -and $TotalFiles -gt 0) {
+    Write-Host ("`r" + (" " * 60) + "`r") -NoNewline
+}
+
+# ---------------------------------------------------------------------
+# End -----------------------------------------------------------------
+# ---------------------------------------------------------------------
+$ElapsedTime = (Get-Date) - $global:StartTime
+$TotalTime = "{0:HH:mm:ss}" -f ([datetime]$ElapsedTime.Ticks)
+Write-Log "Submitted $($global:SubmittedCount) files ($($global:ErrorCount) errors) in $TotalTime" -Level "Info"
+Write-Log "Results: scanned=$($global:ScannedCount) submitted=$($global:SubmittedCount) skipped=$($global:SkippedCount) failed=$($global:ErrorCount)"
+
+# Send collection end or interrupted marker with stats
+# If interrupted marker was already sent by signal handler, skip duplicate
+if (-not $global:MarkersSupported) {
+    Write-Log "Collection marker endpoint unavailable - skipping end/interrupted marker" -Level "Debug"
+} elseif ($global:InterruptedMarkerSent) {
+    Write-Log "Interrupted marker already sent by signal handler - skipping end marker"
+} else {
+    $EndMarkerType = "end"
+    if ($global:Interrupted) {
+        $EndMarkerType = "interrupted"
+        Write-Log "Sending interrupted collection marker" -Level "Warning"
+    }
+    Send-CollectionMarker -MarkerType $EndMarkerType -ScanId $global:ScanId -Stats @{
+        scanned         = $global:ScannedCount
+        submitted       = $global:SubmittedCount
+        skipped         = $global:SkippedCount
+        failed          = $global:ErrorCount
+        elapsed_seconds = [int]$ElapsedTime.TotalSeconds
+    } | Out-Null
+}
+
+# Exit codes: 0 = success, 1 = partial failure, 2 = fatal error
+if ($global:ErrorCount -gt 0) {
+    exit 1
+} else {
+    exit 0
+}
diff --git a/scripts/thunderstorm-collector-py2.py b/scripts/thunderstorm-collector-py2.py
new file mode 100755
index 0000000..1a48783
--- /dev/null
+++ b/scripts/thunderstorm-collector-py2.py
@@ -0,0 +1,740 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# THOR Thunderstorm Collector - Python 2 version
+# Florian Roth, Nextron Systems GmbH, 2024
+#
+# Requires: Python 2.7
+# Use thunderstorm-collector.py for Python 3.4+
+#
+# stdlib only — no third-party dependencies.
+
+from __future__ import print_function
+
+import sys
+
+if sys.version_info[0] != 2:
+    sys.exit("[ERROR] This script requires Python 2.7. For Python 3, use thunderstorm-collector.py")
+
+import argparse
+import httplib
+import json
+import os
+import re
+import signal
+import socket
+import ssl
+import time
+import uuid
+from urllib import quote
+
+# Configuration
+schema = "http"
+max_age = 14  # in days
+max_size_kb = 2048  # in KB (harmonized with other implementations)
+sync_mode = False
+dry_run = False
+retries = 3
+skip_elements = [
+    r"^\/proc",
+    r"^\/mnt",
+    r"\.dat$",
+    r"\.npm",
+    r"\.vmdk$",
+    r"\.vswp$",
+    r"\.nvram$",
+    r"\.vmsd$",
+    r"\.lck$",
+]
+hard_skips = set(
+    os.path.normpath(p) for p in [
+        "/proc", "/dev", "/sys", "/run",
+        "/snap", "/.snapshots",
+        "/sys/kernel/debug", "/sys/kernel/slab", "/sys/kernel/tracing",
+    ]
+)
+
+NETWORK_FS_TYPES = set(["nfs", "nfs4", "cifs", "smbfs", "smb3", "sshfs", "fuse.sshfs",
+                        "afp", "webdav", "davfs2", "fuse.rclone", "fuse.s3fs"])
+SPECIAL_FS_TYPES = set(["proc", "procfs", "sysfs", "devtmpfs", "devpts",
+                        "cgroup", "cgroup2", "pstore", "bpf", "tracefs", "debugfs",
+                        "securityfs", "hugetlbfs", "mqueue", "autofs",
+                        "fusectl", "rpc_pipefs", "nsfs", "configfs", "binfmt_misc",
+                        "selinuxfs", "efivarfs", "ramfs"])
+CLOUD_DIR_NAMES = set(["onedrive", "dropbox", ".dropbox", "googledrive", "google drive",
+                       "icloud drive", "iclouddrive", "nextcloud", "owncloud", "mega",
+                       "megasync", "tresorit", "syncthing"])
+
+
+def get_excluded_mounts():
+    excluded = []
+    try:
+        with open("/proc/mounts", "r") as f:
+            for line in f:
+                parts = line.split()
+                if len(parts) >= 3:
+                    mount_point, fs_type = parts[1], parts[2]
+                    if fs_type in NETWORK_FS_TYPES or fs_type in SPECIAL_FS_TYPES:
+                        excluded.append(mount_point)
+    except (IOError, OSError):
+        pass
+    return excluded
+
+
+def is_cloud_path(filepath):
+    segments = filepath.replace("\\", "/").lower().split("/")
+    for seg in segments:
+        if seg in CLOUD_DIR_NAMES:
+            return True
+        if seg.startswith("onedrive - ") or seg.startswith("onedrive-") or seg.startswith("nextcloud-"):
+            return True
+    if "/library/cloudstorage" in filepath.lower():
+        return True
+    return False
+
+
+# Composed values
+current_date = time.time()
+
+# Stats
+num_submitted = 0
+num_processed = 0
+num_failed = 0
+
+# Path+query to use for submission (just the path portion, not full URL)
+api_endpoint = None
+
+# scan_id for collection markers
+scan_id = None
+
+# Whether we were interrupted
+interrupted = False
+
+# Original args — use a namespace with defaults so signal_handler won't crash
+# if triggered before argparse runs
+class _DefaultArgs(object):
+    server = "localhost"
+    port = 8080
+    tls = False
+    insecure = False
+    ca_cert = None
+    source = None
+    debug = False
+
+args = _DefaultArgs()
+
+# Progress reporting
+progress_enabled = None  # None = auto-detect TTY
+
+
+def make_connection(server, port, use_tls, insecure, ca_cert=None, timeout=30):
+    """Create an HTTP(S) connection with proper TLS settings."""
+    if use_tls:
+        if insecure:
+            if hasattr(ssl, '_create_unverified_context'):
+                context = ssl._create_unverified_context()
+            else:
+                context = None  # pre-2.7.9: no verification by default
+        else:
+            if hasattr(ssl, 'create_default_context'):
+                context = ssl.create_default_context()
+                if ca_cert:
+                    context.load_verify_locations(ca_cert)
+            else:
+                if ca_cert:
+                    print_stderr("[ERROR] Python runtime lacks ssl.create_default_context(); "
+                                 "cannot enforce --ca-cert verification.")
+                    sys.exit(2)
+                context = None  # pre-2.7.9: limited TLS, no SNI
+        if context is not None:
+            conn = httplib.HTTPSConnection(server, port, context=context, timeout=timeout)
+        else:
+            conn = httplib.HTTPSConnection(server, port, timeout=timeout)
+    else:
+        conn = httplib.HTTPConnection(server, port, timeout=timeout)
+    return conn
+
+
+def print_stderr(msg):
+    """Print error messages to stderr."""
+    if progress_enabled:
+        sys.stderr.write("\r" + " " * 80 + "\r")
+    sys.stderr.write(msg + "\n")
+    sys.stderr.flush()
+
+
+def show_progress(current, filepath):
+    """Show progress indicator if enabled."""
+    if not progress_enabled:
+        return
+    # We don't know total ahead of time, so show count
+    display_path = filepath[-60:] if len(filepath) > 60 else filepath
+    try:
+        sys.stderr.write("\r[{0} scanned] Processing: {1} ...{2}".format(
+            current, display_path, " " * 10))
+        sys.stderr.flush()
+    except (UnicodeEncodeError, UnicodeDecodeError):
+        # Skip progress display for paths with encoding issues
+        pass
+
+
+def send_interrupted_marker():
+    """Send an interrupted collection marker with current stats."""
+    global interrupted
+    if interrupted:
+        return
+    interrupted = True
+    end_date = time.time()
+    elapsed = int(end_date - current_date)
+    collection_marker(
+        args.server, args.port, args.tls, args.insecure,
+        getattr(args, 'ca_cert', None),
+        args.source or socket.gethostname(), "0.1",
+        "interrupted",
+        scan_id=scan_id,
+        stats={
+            "scanned": num_processed,
+            "submitted": num_submitted,
+            "failed": num_failed,
+            "elapsed_seconds": elapsed,
+        }
+    )
+
+
+def signal_handler(signum, frame):
+    """Handle SIGINT/SIGTERM gracefully."""
+    if interrupted:
+        # Already handling a signal; avoid re-entrance
+        sys.exit(1)
+    # Ignore further signals while we clean up
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+    signal.signal(signal.SIGTERM, signal.SIG_IGN)
+    sig_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM"
+    print_stderr("\n[INFO] Received {}, sending interrupted marker...".format(sig_name))
+    try:
+        send_interrupted_marker()
+    except Exception as e:
+        print_stderr("[ERROR] Failed to send interrupted marker: {}".format(e))
+    if progress_enabled:
+        sys.stderr.write("\n")
+    print("Thunderstorm Collector Run interrupted (Checked: {} Submitted: {} Failed: {})".format(
+        num_processed, num_submitted, num_failed
+    ))
+    sys.exit(1)
+
+
+def process_dir(workdir):
+    global num_processed
+
+    # Skip if the workdir itself is in hard_skips
+    if os.path.normpath(workdir) in hard_skips:
+        if args.debug:
+            print("[DEBUG] Skipping hard-skipped directory {}".format(workdir))
+        return
+
+    for dirpath, dirnames, filenames in os.walk(workdir, followlinks=False):
+        # Hard skip directories (modify in-place to prevent descent)
+        filtered = []
+        for d in dirnames:
+            full = os.path.join(dirpath, d)
+            if os.path.normpath(full) in hard_skips:
+                continue
+            if os.path.islink(full):
+                continue
+            if is_cloud_path(full):
+                continue
+            filtered.append(d)
+        dirnames[:] = filtered
+
+        for name in filenames:
+            filepath = os.path.join(dirpath, name)
+
+            try:
+                # Skip symlinks
+                if os.path.islink(filepath):
+                    continue
+            except (OSError, IOError):
+                continue
+
+            if args.debug:
+                print("[DEBUG] Checking {} ...".format(filepath))
+
+            # Count
+            num_processed += 1
+
+            # Show progress
+            show_progress(num_processed, filepath)
+
+            # Skip files
+            if skip_file(filepath):
+                continue
+
+            # Submit
+            submit_sample(filepath)
+
+
+def skip_file(filepath):
+    # Regex skips
+    for pattern in skip_elements:
+        if re.search(pattern, filepath):
+            if args.debug:
+                print("[DEBUG] Skipping file due to configured skip_file exclusion {}".format(filepath))
+            return True
+
+    # Size (max_size_kb is in KB)
+    try:
+        file_size = os.path.getsize(filepath)
+        mtime = os.path.getmtime(filepath)
+    except (OSError, IOError):
+        if args.debug:
+            print_stderr("[DEBUG] Skipping unreadable file {}".format(filepath))
+        return True
+
+    if file_size > max_size_kb * 1024:
+        if args.debug:
+            print("[DEBUG] Skipping file due to size {}".format(filepath))
+        return True
+
+    # Age (max_age=0 means no age filtering)
+    if max_age > 0 and mtime < current_date - (max_age * 86400):
+        if args.debug:
+            print("[DEBUG] Skipping file due to age {}".format(filepath))
+        return True
+
+    return False
+
+
+def submit_sample(filepath):
+    global num_submitted, num_failed
+
+    if dry_run:
+        print("[DRY-RUN] Would submit {} ...".format(filepath))
+        num_submitted += 1
+        return
+
+    print("[SUBMIT] Submitting {} ...".format(filepath))
+
+    if not api_endpoint:
+        print_stderr("[ERROR] API endpoint not configured; cannot submit.")
+        num_failed += 1
+        return
+
+    HARD_MAX_BYTES = 200 * 1024 * 1024
+
+    boundary = str(uuid.uuid4())
+
+    # Sanitize filename for multipart header safety.
+    # Keep full client path in multipart filename for parity with other collectors.
+    safe_filename = filepath
+    # Remove/replace characters unsafe for Content-Disposition header
+    for ch in ['"', ';', '\r', '\n', '\x00', '\t']:
+        safe_filename = safe_filename.replace(ch, '_')
+    # Ensure filename is not empty after sanitization
+    if not safe_filename or safe_filename.strip('.') == '':
+        safe_filename = 'unnamed_file'
+
+    hostname = socket.gethostname()
+    source = args.source or hostname
+
+    # Build multipart preamble and epilogue (metadata + file header/footer)
+    # In Python 2, keep everything as byte strings to avoid UnicodeDecodeError
+    # when hostname or filepath contains non-ASCII bytes.
+    boundary_bytes = boundary.encode('ascii') if isinstance(boundary, unicode) else boundary
+
+    def _form_field(name, value):
+        if isinstance(value, unicode):
+            value = value.encode('utf-8', 'replace')
+        elif not isinstance(value, bytes):
+            value = str(value)
+        part = b"--" + boundary_bytes + b"\r\n"
+        part += b"Content-Disposition: form-data; name=\"" + name.encode('ascii') + b"\"\r\n\r\n"
+        part += value + b"\r\n"
+        return part
+
+    preamble = b""
+
+    safe_filename_bytes = safe_filename.encode('utf-8', 'replace') if isinstance(safe_filename, unicode) else safe_filename
+    file_header = b"--" + boundary_bytes + b"\r\n"
+    file_header += b"Content-Disposition: form-data; name=\"file\"; filename=\"" + safe_filename_bytes + b"\"\r\n"
+    file_header += b"Content-Type: application/octet-stream\r\n\r\n"
+    preamble += file_header
+
+    epilogue = b"\r\n--" + boundary_bytes + b"--\r\n"
+
+    # Read entire file into memory (capped at HARD_MAX_BYTES) so we know the exact
+    # length before sending, avoiding Content-Length mismatches if the file changes.
+    try:
+        with open(filepath, "rb") as f:
+            file_data = f.read(HARD_MAX_BYTES + 1)
+    except (OSError, IOError) as e:
+        print_stderr("[ERROR] Could not read '{}' - {}".format(filepath, e))
+        num_failed += 1
+        return
+
+    if len(file_data) > HARD_MAX_BYTES:
+        print_stderr("[ERROR] File '{}' exceeds hard size limit (>{}B)".format(
+            filepath, HARD_MAX_BYTES))
+        num_failed += 1
+        return
+
+    if len(file_data) == 0:
+        if args.debug:
+            print("[DEBUG] Skipping empty file {}".format(filepath))
+        return
+
+    content_length = len(preamble) + len(file_data) + len(epilogue)
+
+    headers = {
+        "Content-Type": "multipart/form-data; boundary={}".format(boundary),
+        "Content-Length": str(content_length),
+    }
+
+    attempt = 0
+    max_retry_after = 300  # Cap Retry-After at 5 minutes
+    while attempt < retries:
+        conn = None
+        resp = None
+        try:
+            conn = make_connection(args.server, args.port, args.tls, args.insecure,
+                                   getattr(args, 'ca_cert', None))
+            conn.putrequest("POST", api_endpoint)
+            for hdr, val in headers.items():
+                conn.putheader(hdr, val)
+            conn.endheaders()
+
+            # Send: preamble
+            conn.send(preamble)
+
+            # Send: file data
+            conn.send(file_data)
+
+            # Send: epilogue
+            conn.send(epilogue)
+
+            resp = conn.getresponse()
+            resp.read()  # Drain response body to allow connection reuse
+
+        except Exception as e:
+            print_stderr("[ERROR] Could not submit '{}' - {}".format(filepath, e))
+            attempt += 1
+            if attempt < retries:
+                backoff = min(2 ** attempt, 60)
+                time.sleep(backoff)
+            continue
+        finally:
+            if conn is not None:
+                try:
+                    conn.close()
+                except Exception:
+                    pass
+
+        if resp is None:
+            attempt += 1
+            continue
+
+        if resp.status == 503:
+            attempt += 1
+            if attempt >= retries:
+                print_stderr("[ERROR] Server busy after {} attempts, giving up on '{}'".format(retries, filepath))
+                break
+            retry_after = resp.getheader("Retry-After", "30")
+            try:
+                retry_time = min(int(retry_after), max_retry_after)
+                if retry_time < 0:
+                    retry_time = 30
+            except (ValueError, TypeError):
+                retry_time = 30
+            print_stderr("[WARN] Server busy (503), retrying after {}s ...".format(retry_time))
+            time.sleep(retry_time)
+            continue
+        elif 200 <= resp.status < 300:
+            num_submitted += 1
+            return
+        else:
+            print_stderr("[ERROR] HTTP return status: {}, reason: {}".format(resp.status, resp.reason))
+            attempt += 1
+            if attempt < retries:
+                backoff = min(2 ** attempt, 60)
+                time.sleep(backoff)
+            continue
+
+    # All retries exhausted
+    num_failed += 1
+
+
+def collection_marker(server, port, use_tls, insecure, ca_cert, source, collector_version, marker_type, scan_id=None, stats=None):  # noqa: E501
+    """POST a begin/end/interrupted collection marker to /api/collection.
+    Returns a tuple (scan_id, success). scan_id may be None even on success.
+    For 'begin' markers, retries once after 2s on failure."""
+    body = {
+        "type": marker_type,
+        "source": source,
+        "hostname": socket.gethostname(),
+        "collector": "python2/{}".format(collector_version),
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    }
+    if scan_id:
+        body["scan_id"] = scan_id
+    if stats:
+        body["stats"] = stats
+
+    max_attempts = 2 if marker_type == "begin" else 1
+
+    for attempt in range(max_attempts):
+        conn = None
+        try:
+            conn = make_connection(server, port, use_tls, insecure, ca_cert, timeout=10)
+            payload = json.dumps(body)
+            conn.request("POST", "/api/collection", body=payload,
+                         headers={"Content-Type": "application/json"})
+            resp = conn.getresponse()
+            resp_body = resp.read()
+        except Exception as e:
+            if attempt < max_attempts - 1:
+                print_stderr("[WARN] Collection marker '{}' failed: {}, retrying in 2s...".format(marker_type, e))
+                time.sleep(2)
+                continue
+            else:
+                print_stderr("[ERROR] Collection marker '{}' failed: {}".format(marker_type, e))
+                return (None, False)
+        finally:
+            if conn is not None:
+                try:
+                    conn.close()
+                except Exception:
+                    pass
+
+        if 200 <= resp.status < 300:
+            if resp_body and resp_body.strip():
+                try:
+                    data = json.loads(resp_body)
+                    return (data.get("scan_id"), True)
+                except (ValueError, TypeError):
+                    if marker_type == "begin":
+                        print_stderr("[WARN] Collection marker 'begin' returned non-JSON 200 response")
+                    return (None, True)
+            else:
+                return (None, True)
+        else:
+            if resp.status in (404, 501):
+                print_stderr("[WARN] Collection marker '{}' not supported (HTTP {}) — continuing without scan_id".format(
+                    marker_type, resp.status))
+                return ("", True)
+            print_stderr("[WARN] Collection marker '{}' returned HTTP {}".format(marker_type, resp.status))
+            if attempt < max_attempts - 1:
+                time.sleep(2)
+                continue
+            return (None, False)
+
+    return (None, False)  # should never reach here
+
+
+# Main
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="thunderstorm-collector-py2.py",
+        description="Tool to collect files to send to THOR Thunderstorm (Python 2.7 version). Only uses standard library functions.",
+    )
+    parser.add_argument(
+        "-d", "--dirs",
+        nargs="+",
+        default=["/"],
+        help="Directories that should be scanned. (Default: /)",
+    )
+    parser.add_argument(
+        "-s", "--server",
+        required=True,
+        help="FQDN/IP of the THOR Thunderstorm server.",
+    )
+    parser.add_argument(
+        "-p", "--port",
+        type=int,
+        default=8080,
+        help="Port of the THOR Thunderstorm server. (Default: 8080)",
+    )
+    parser.add_argument(
+        "-t", "--tls",
+        action="store_true",
+        help="Use TLS to connect to the THOR Thunderstorm server.",
+    )
+    parser.add_argument(
+        "-k", "--insecure",
+        action="store_true",
+        help="Skip TLS verification and proceed without checking.",
+    )
+    parser.add_argument(
+        "-S", "--source",
+        default=None,
+        help="Source identifier to be used in the Thunderstorm submission. (Default: hostname)",
+    )
+    parser.add_argument(
+        "--max-age", type=int, default=14,
+        help="Max file age in days (default: 14)"
+    )
+    parser.add_argument(
+        "--max-size-kb", type=int, default=2048,
+        help="Max file size in KB (default: 2048)"
+    )
+    parser.add_argument(
+        "--sync", action="store_true",
+        help="Use /api/check (synchronous) instead of /api/checkAsync"
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Do not upload, only show what would be submitted"
+    )
+    parser.add_argument(
+        "--retries", type=int, default=3,
+        help="Retry attempts per file (default: 3)"
+    )
+    parser.add_argument(
+        "--ca-cert",
+        default=None,
+        help="Path to custom CA certificate bundle for TLS verification."
+    )
+    progress_group = parser.add_mutually_exclusive_group()
+    progress_group.add_argument(
+        "--progress",
+        action="store_true",
+        default=False,
+        help="Force enable progress reporting."
+    )
+    progress_group.add_argument(
+        "--no-progress",
+        action="store_true",
+        default=False,
+        help="Force disable progress reporting."
+    )
+    parser.add_argument("--debug", action="store_true", help="Enable debug logging.")
+
+    args = parser.parse_args()
+
+    # Apply parsed args to module-level config
+    max_age = args.max_age
+    max_size_kb = args.max_size_kb
+    dry_run = args.dry_run
+    retries = args.retries
+    sync_mode = args.sync
+
+    if max_age < 0:
+        print_stderr("[ERROR] --max-age must be non-negative")
+        sys.exit(2)
+    if max_size_kb <= 0:
+        print_stderr("[ERROR] --max-size-kb must be positive")
+        sys.exit(2)
+    if retries < 1:
+        print_stderr("[ERROR] --retries must be at least 1")
+        sys.exit(2)
+
+    if args.tls:
+        schema = "https"
+
+    # Validate --ca-cert
+    if args.ca_cert:
+        if not os.path.isfile(args.ca_cert):
+            print_stderr("[ERROR] CA certificate file not found: {}".format(args.ca_cert))
+            sys.exit(2)
+
+    # Determine progress reporting mode
+    if args.progress:
+        progress_enabled = True
+    elif args.no_progress:
+        progress_enabled = False
+    else:
+        progress_enabled = sys.stderr.isatty()
+
+    # Install signal handlers
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    # Build the API path+query (path only, not full URL — httplib needs just the path)
+    source_query = ""
+    if args.source:
+        source_query = "?source={}".format(quote(args.source, safe=''))
+
+    api_path = "/api/check" if sync_mode else "/api/checkAsync"
+    api_endpoint = "{}{}".format(api_path, source_query)
+
+    # Full URL for display only
+    display_url = "{}://{}:{}{}".format(schema, args.server, args.port, api_endpoint)
+
+    print("=" * 80)
+    print("   Python Thunderstorm Collector (Python 2)")
+    print("   Florian Roth, Nextron Systems GmbH, 2024")
+    print()
+    print("=" * 80)
+    print("Target Directory: {}".format(", ".join(args.dirs)))
+    # Extend hard_skips with mount points of network/special filesystems
+    for mp in get_excluded_mounts():
+        norm_mp = os.path.normpath(mp)
+        hard_skips.add(norm_mp)
+
+    print("Thunderstorm Server: {}".format(args.server))
+    print("Thunderstorm Port: {}".format(args.port))
+    print("Using API Endpoint: {}".format(display_url))
+    print("Maximum Age of Files: {} days".format(max_age))
+    print("Maximum File Size: {} KB".format(max_size_kb))
+    sorted_skips = sorted(hard_skips)
+    print("Excluded directories: {}".format(", ".join(sorted_skips[:10]) + (" ..." if len(sorted_skips) > 10 else "")))
+    if args.source:
+        print("Source Identifier: {}".format(args.source))
+    print()
+
+    print("Starting the walk at: {} ...".format(", ".join(args.dirs)))
+
+    # Send collection begin marker (with single retry on failure)
+    scan_id, begin_success = collection_marker(
+        args.server, args.port, args.tls, args.insecure,
+        args.ca_cert,
+        args.source or socket.gethostname(), "0.1",
+        "begin"
+    )
+    if not begin_success:
+        print_stderr("[ERROR] Failed to establish collection session with server {}:{}. Exiting.".format(
+            args.server, args.port))
+        sys.exit(2)
+    if scan_id:
+        print("[INFO] Collection scan_id: {}".format(scan_id))
+        # Append scan_id to the endpoint (URL-encoded)
+        separator = "&" if "?" in api_endpoint else "?"
+        api_endpoint = "{}{}scan_id={}".format(api_endpoint, separator, quote(str(scan_id), safe=''))
+
+    for walkdir in args.dirs:
+        if not os.path.isdir(walkdir):
+            print_stderr("[WARN] Directory does not exist or is not accessible: {}".format(walkdir))
+            continue
+        process_dir(walkdir)
+
+    # Clear progress line if needed
+    if progress_enabled:
+        sys.stderr.write("\r" + " " * 80 + "\r")
+        sys.stderr.flush()
+
+    # Send collection end marker with stats
+    end_date = time.time()
+    elapsed = int(end_date - current_date)
+    minutes = elapsed // 60
+    _end_scan_id, _end_ok = collection_marker(
+        args.server, args.port, args.tls, args.insecure,
+        args.ca_cert,
+        args.source or socket.gethostname(), "0.1",
+        "end",
+        scan_id=scan_id,
+        stats={
+            "scanned": num_processed,
+            "submitted": num_submitted,
+            "failed": num_failed,
+            "elapsed_seconds": elapsed,
+        }
+    )
+    if not _end_ok:
+        print_stderr("[WARN] Failed to send collection end marker")
+
+    print("Thunderstorm Collector Run finished (Checked: {} Submitted: {} Failed: {} Minutes: {})".format(
+        num_processed, num_submitted, num_failed, minutes
+    ))
+
+    # Exit codes: 0 = success, 1 = partial failure, 2 = fatal error
+    if num_failed > 0:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/scripts/thunderstorm-collector.bat b/scripts/thunderstorm-collector.bat
index 54d6294..060403c 100644
--- a/scripts/thunderstorm-collector.bat
+++ b/scripts/thunderstorm-collector.bat
@@ -4,50 +4,96 @@ SETLOCAL EnableDelayedExpansion
 :: ----------------------------------------------------------------
 :: THOR Thunderstorm Collector
 :: Windows Batch
-:: Florian Roth
-:: v0.4
+:: Florian Roth, Nextron Systems GmbH
+:: v0.5
 ::
-:: A Windows Batch script that uses a compiled Curl for Windows
+:: A Windows Batch script that uses Curl for Windows
 :: to upload files to a THOR Thunderstorm server
 ::
 :: Requirements:
-:: Curl for Windows (place ./bin/curl.exe from the package into the script folder)
-:: https://curl.haxx.se/windows/
+:: Curl for Windows (place curl.exe into the script folder or PATH)
+:: https://curl.se/windows/
 ::
-:: Note on Windows 10
-:: Windows 10 already includes a curl since build 17063, so all versions newer than
-:: version 1709 (Redstone 3) from October 2017 already meet the requirements
+:: Note on Windows 10+
+:: Windows 10 already includes curl since build 17063 (version 1709+)
 ::
-:: Note on very old Windows versions:
-:: The last version of curl that works with Windows 7 / Windows 2008 R2
-:: and earlier is v7.46.0 and can be still be downloaded from here:
-:: https://bintray.com/vszakats/generic/download_file?file_path=curl-7.46.0-win32-mingw.7z
+:: Note on Windows 7 / Server 2008 R2:
+:: Curl 8.x requires the Universal C Runtime (KB2999226 or KB3118401).
+:: Install the Visual C++ 2015 Redistributable or the UCRT update,
+:: then place the curl.exe + libcurl DLL in the script folder.
+::
+:: Known Limitations (cmd.exe platform constraints):
+:: - No collection markers: begin/end markers and scan_id tracking require
+::   JSON parsing which is impractical in pure batch. Use the PowerShell
+::   collector (.ps1 or .ps2.ps1) for collection marker support.
+:: - No --ca-cert / --insecure support: Use CURL_CA_BUNDLE env var or
+::   URL_SCHEME=http as workarounds.
+:: - No progress reporting: cmd.exe cannot detect interactive terminals.
+:: - No signal handling: Ctrl+C terminates without cleanup.
+:: - MAX_AGE filtering: FORFILES /D -N has inverted semantics (files ≥N days
+::   OLD, not files from last N days). This script applies age filtering
+::   per-file in PROCESSFILE as a workaround.
+:: - FINDSTR regex: Windows 7 has limited regex support ($ anchors and
+::   negated character classes [^...] are broken). Hostname validation
+::   provides defense-in-depth; server-side validation is authoritative.
+:: ----------------------------------------------------------------
 
 :: CONFIGURATION -------------------------------------------------
 
-:: THUNDERSTORM SERVER -------------------------------------------
-:: The thunderstorm server host name (fqdn) or IP
-SET THUNDERSTORM_SERVER=ygdrasil.nextron
-SET THUNDERSTORM_PORT=8080
-:: Use http or https
-SET URL_SCHEME=http
+:: THUNDERSTORM SERVER
+SET _TS=%THUNDERSTORM_SERVER%
+SET _TP=%THUNDERSTORM_PORT%
+SET _SCHEME=%URL_SCHEME%
+IF "%_TS%"=="" SET _TS=ygdrasil.nextron
+IF "%_TP%"=="" SET _TP=8080
+IF "%_SCHEME%"=="" SET _SCHEME=http
+IF /I NOT "%_SCHEME%"=="http" IF /I NOT "%_SCHEME%"=="https" (
+    ECHO [ERROR] Invalid URL_SCHEME: %_SCHEME%. Must be http or https. 1>&2
+    EXIT /b 2
+)
+
+:: SELECTION
+SET _DIRS=%COLLECT_DIRS%
+SET _EXTS=%RELEVANT_EXTENSIONS%
+SET _MAXSZ=%COLLECT_MAX_SIZE%
+SET _MAXAGE=%MAX_AGE%
+IF "%_DIRS%"=="" SET "_DIRS=C:\Users;C:\Temp;C:\Windows"
+IF "%_EXTS%"=="" SET _EXTS=.vbs .ps1 .rar .tmp .bat .chm .dll .exe .hta .js .lnk .sct .war .jsp .jspx .php .asp .aspx .log .dmp .txt .jar .job
+IF "%_MAXSZ%"=="" SET _MAXSZ=3000000
+IF "%_MAXAGE%"=="" SET _MAXAGE=30
 
-:: SELECTION -----------------------------------------------------
+:: DEBUG & SOURCE
+SET _DBG=%DEBUG%
+SET _SRC=%SOURCE%
+IF "%_DBG%"=="" SET _DBG=0
 
-:: The directory that should be walked
-SET COLLECT_DIRS=C:\Users C:\Temp C:\Windows
-:: The pattern of files to include
-SET RELEVANT_EXTENSIONS=.vbs .ps .ps1 .rar .tmp .bat .chm .dll .exe .hta .js .lnk .sct .war .jsp .jspx .php .asp .aspx .log .dmp .txt .jar .job
-:: Maximum file size to collect (in bytes) (defualt: 3MB)
-SET /A COLLECT_MAX_SIZE=3000000
-:: Maximum file age in days (default: 7300 days = 20 years)
-SET /A MAX_AGE=30
+:: Basic server hostname validation: reject empty and values containing characters
+:: outside the allowed set (alphanumeric, hyphens, dots, colons, brackets for IPv6).
+:: Full URL validation is delegated to curl.
+IF "!_TS!"=="" (
+    ECHO [ERROR] Server hostname is empty. Set THUNDERSTORM_SERVER. 1>&2
+    EXIT /b 2
+)
+ECHO !_TS!| FINDSTR /R "[^a-zA-Z0-9.\-\[\]:]" >nul 2>&1
+IF NOT ERRORLEVEL 1 (
+    ECHO [ERROR] Server hostname contains invalid characters: !_TS! 1>&2
+    EXIT /b 2
+)
 
-:: Debug
-SET DEBUG=0
+:: Validate numeric parameters
+SET /A _TP=%_TP% 2>nul
+SET /A _MAXSZ=%_MAXSZ% 2>nul
+SET /A _MAXAGE=%_MAXAGE% 2>nul
+IF !_TP! LEQ 0 SET _TP=8080
+IF !_TP! GTR 65535 SET _TP=8080
+IF !_MAXSZ! LEQ 0 SET _MAXSZ=3000000
+IF !_MAXAGE! LSS 0 SET _MAXAGE=30
 
-:: Source
-SET SOURCE=
+:: Counters
+SET /A _SUBMITTED=0
+SET /A _SKIPPED=0
+SET /A _FAILED=0
+SET /A _SCANNED=0
 
 :: WELCOME -------------------------------------------------------
 
@@ -57,89 +103,333 @@ ECHO   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _
 ECHO    / / / _ \/ // / _ \/ _  / -_) __(_--/ __/ _ \/ __/  ' \
 ECHO   /_/ /_//_/\_,_/_//_/\_,_/\__/_/ /___/\__/\___/_/ /_/_/_/
 ECHO.
-ECHO   Windows Batch Collector
-ECHO   Florian Roth, 2020
+ECHO   Windows Batch Collector v0.5
+ECHO   Florian Roth, Nextron Systems GmbH, 2020-2026
 ECHO.
 ECHO =============================================================
 ECHO.
 
-:: REQUIREMENTS -------------------------------------------------
-:: CURL in PATH
+:: REQUIREMENTS --------------------------------------------------
+:: Prefer curl next to the script (bundled with UCRT DLLs), then current dir, then PATH
+SET _CURL=
+IF EXIST "%~dp0curl.exe" (
+    SET "_CURL=%~dp0curl.exe"
+    GOTO :CURLOK
+)
+IF EXIST "%CD%\curl.exe" (
+    SET "_CURL=%CD%\curl.exe"
+    GOTO :CURLOK
+)
 where /q curl.exe
 IF NOT ERRORLEVEL 1 (
-    GOTO CHECKDONE
+    FOR /F "tokens=*" %%C IN ('where curl.exe') DO (
+        IF NOT DEFINED _CURL SET "_CURL=%%C"
+    )
+    GOTO :CURLOK
 )
-:: CURL in current directory
-IF EXIST %CD%\curl.exe (
-    GOTO CHECKDONE
+ECHO [ERROR] Cannot find curl in PATH or the script directory. 1>&2
+ECHO     Download from https://curl.se/windows/ and place curl.exe next to this script. 1>&2
+EXIT /b 2
+:CURLOK
+ECHO [+] Curl found: %_CURL%
+
+:: SOURCE --------------------------------------------------------
+IF "%_SRC%"=="" (
+    FOR /F "tokens=*" %%i IN ('hostname') DO SET _SRC=%%i
+    ECHO [+] Source: !_SRC!
 )
-ECHO Cannot find curl in PATH or the current directory. Download it from https://curl.haxx.se/windows/ and place curl.exe from the ./bin sub folder into the collector script folder.
-ECHO If you're collecting on Windows systems older than Windows Vista, use curl version 7.46.0 from https://bintray.com/vszakats/generic/download_file?file_path=curl-7.46.0-win32-mingw.7z
-EXIT /b 1
-:CHECKDONE
-ECHO Curl has been found. We're ready to go.
 
-:: COLLECTION --------------------------------------------------
+:: Create temp files for file listing and curl responses
+SET "_FILELIST=%TEMP%\ts-collector-%RANDOM%%RANDOM%.tmp"
+SET "_RESPTMP=%TEMP%\ts-collector-resp-%RANDOM%%RANDOM%.tmp"
+IF EXIST "!_FILELIST!" DEL "!_FILELIST!" 2>nul
+IF EXIST "!_RESPTMP!" DEL "!_RESPTMP!" 2>nul
+
+:: URL-encode the source for use in query strings
+:: Only encode characters problematic in URLs
+SET "_SRCURL=!_SRC!"
+SET "_SRCURL=!_SRCURL:%%=%%25!"
+SET "_SRCURL=!_SRCURL: =%%20!"
+SET "_SRCURL=!_SRCURL:&=%%26!"
+SET "_SRCURL=!_SRCURL:+=%%2B!"
+SET "_SRCURL=!_SRCURL:#=%%23!"
+SET "_SRCURL=!_SRCURL:==%%3D!"
+
+:: NOTE: Collection markers (begin/end) and scan_id tracking are not
+:: supported in the batch collector. Use the PowerShell collector
+:: (.ps1 or .ps2.ps1) for collection marker support.
+SET _IDPARAM=
+
+:: BUILD FILE LIST -----------------------------------------------
+:: Phase 1: Use FORFILES to generate a filtered file list.
+:: FORFILES does NOT follow junctions/reparse points, solving the infinite loop issue.
 
-:: SOURCE
-IF "%SOURCE%"=="" (
-    FOR /F "tokens=*" %%i IN ('hostname') DO SET SOURCE=%%i
-    ECHO No Source provided, using hostname=!SOURCE!
+:: NOTE: Age filtering is NOT performed in the FORFILES phase because
+:: FORFILES /D -N has INVERTED semantics: it means "files modified ON OR BEFORE
+:: N days ago" (old files), not "files from the last N days". Age filtering
+:: is handled during file iteration in PROCESSFILE instead.
+:: See: https://ss64.com/nt/forfiles.html - "/D -dd selects files with a
+:: last modified date less than or equal to the current date minus dd days."
+
+ECHO [+] Scanning !_DIRS! ...
+ECHO [+] Filters: MAX_SIZE=%_MAXSZ% bytes, MAX_AGE=%_MAXAGE% days, EXTENSIONS=%_EXTS%
+:: NOTE: MAX_AGE is applied per file in PROCESSFILE (not in FORFILES /D).
+
+:: Iterate directories using semicolon delimiter (supports paths with spaces)
+:: COLLECT_DIRS can be semicolon-separated, e.g. "C:\Program Files;C:\Temp"
+:: Write directory list to a temp file, then iterate with delayed expansion off
+:: to protect paths containing '!' characters.
+SET "_DIRLIST=!_FILELIST!.dirs"
+:: Split semicolon-separated directory list into lines
+FOR %%T IN ("!_DIRS:;=" "!") DO (
+    IF NOT "%%~T"=="" ECHO %%~T>>"!_DIRLIST!"
 )
-IF "%SOURCE%" NEQ "" (
-    SET SOURCE=?source=%SOURCE%
+FOR /F "usebackq delims=" %%T IN ("!_DIRLIST!") DO (
+    CALL :SCANDIR "%%T"
 )
+DEL "!_DIRLIST!" 2>nul
+GOTO :SCANDONE
 
-:: Directory walk and upload
-ECHO Processing %COLLECT_DIRS% with filters MAX_SIZE: %COLLECT_MAX_SIZE% MAX_AGE: %MAX_AGE% days EXTENSIONS: %RELEVANT_EXTENSIONS%
-ECHO This could take a while depending on the disk size and number of files. (set DEBUG=1 to see all skips)
-FOR %%T IN (%COLLECT_DIRS%) DO (
-    SET TARGETDIR=%%T
-    IF NOT EXIST !TARGETDIR! (
-        ECHO Warning: Target directory !TARGETDIR! does not exist. Skipping ...
-    ) ELSE (
-        ECHO Checking !TARGETDIR! ...
-        :: Nested FOR does not accept delayed-expansion variables, so we need to use a workaround via pushd/popd
-        pushd !TARGETDIR!
-        FOR /R . %%F IN (*.*) DO (
-            SETLOCAL
-            :: Marker if processed due to selected extensions
-            SET PROCESSED=false
-            :: Extension Check
-            FOR %%E IN (%RELEVANT_EXTENSIONS%) DO (
-                :: Check if one of the relevant extensions matches the file extension
-                IF /I "%%~xF"=="%%E" (
-                    SET PROCESSED=true
-                    :: When the folder is empty [root directory] add extra characters
-                    IF "%%~pF"=="\" (
-                        SET FOLDER=%%~dF%%~pF\\
-                    ) ELSE (
-                        SET FOLDER=%%~dF%%~pF
-                    )
-                    :: File Size Check
-                    IF %%~zF GTR %COLLECT_MAX_SIZE% (
-                        :: File is too big
-                        IF %DEBUG% == 1 ECHO Skipping %%F due to big file size ...
-                    ) ELSE (
-                        :: Age check
-                        FORFILES /P "!FOLDER:~0,-1!" /M "%%~nF%%~xF" /D -%MAX_AGE% >nul 2>nul && (
-                            :: File is too old
-                            IF %DEBUG% == 1 ECHO Skipping %%F due to age ...
-                        ) || (
-                            :: Upload
-                            ECHO Uploading %%F ..
-                            :: We'll start the upload process in background to speed up the submission process
-                            START /B curl -F file=@%%F -H "Content-Type: multipart/form-data" -o nul -s %URL_SCHEME%://%THUNDERSTORM_SERVER%:%THUNDERSTORM_PORT%/api/checkAsync%SOURCE%
-                        )
-                    )
-                )
+:SCANDIR
+SETLOCAL DisableDelayedExpansion
+SET "_TDIR=%~1"
+SETLOCAL EnableDelayedExpansion
+IF "!_TDIR!"=="" (
+    ENDLOCAL & ENDLOCAL
+    GOTO :EOF
+)
+IF NOT EXIST "!_TDIR!" (
+    ECHO [ERROR] Warning: !_TDIR! does not exist, skipping. 1>&2
+    ENDLOCAL & ENDLOCAL
+    GOTO :EOF
+)
+IF !_DBG! == 1 ECHO [D] Scanning !_TDIR! ...
+:: FORFILES /S = recurse (skips junctions), /C = command per file
+:: @path outputs quoted full path, @isdir filters out directories
+:: Note: Age filtering via /D has inverted semantics and is not used here.
+:: Age is checked during iteration in PROCESSFILE.
+FORFILES /P "!_TDIR!" /S /C "cmd /c if @isdir==FALSE echo @path" >>"!_FILELIST!" 2>nul
+ENDLOCAL & ENDLOCAL
+GOTO :EOF
+
+:SCANDONE
+
+:: Count total files found
+SET /A _TOTAL=0
+IF EXIST "!_FILELIST!" (
+    FOR /F "usebackq" %%C IN (`type "!_FILELIST!" ^| find /c /v ""`) DO SET /A _TOTAL=%%C
+)
+ECHO [+] Found !_TOTAL! files.
+
+:: PHASE 2: FILTER AND UPLOAD ------------------------------------
+IF !_TOTAL! == 0 GOTO :DONE
+
+:: Disable delayed expansion for the file-processing loop so paths
+:: containing '!' characters are not corrupted during %%F expansion.
+SET "_FILELIST_SAVED=!_FILELIST!"
+SETLOCAL DisableDelayedExpansion
+FOR /F "usebackq delims=" %%F IN ("%_FILELIST_SAVED%") DO (
+    CALL :PROCESSFILE "%%~F"
+)
+ENDLOCAL
+GOTO :DONE
+
+:: ---------------------------------------------------------------
+:: Subroutine: PROCESSFILE
+:: Processes a single file path passed as %1.
+:: Uses SETLOCAL/ENDLOCAL to toggle delayed expansion, protecting
+:: file paths that contain '!' characters from being corrupted.
+:: ---------------------------------------------------------------
+:PROCESSFILE
+:: First, capture the raw path with delayed expansion OFF so '!' is preserved
+SETLOCAL DisableDelayedExpansion
+SET "_FILE=%~1"
+:: Now re-enable delayed expansion for counter logic and comparisons
+SETLOCAL EnableDelayedExpansion
+
+:: Extension check — use a nested FOR to get file attributes from the filesystem
+SET _EXTMATCH=0
+SET _SZ=
+SET "_FEXT="
+FOR %%S IN ("!_FILE!") DO (
+    SET "_SZ=%%~zS"
+    SET "_FEXT=%%~xS"
+)
+FOR %%E IN (%_EXTS%) DO (
+    IF /I "!_FEXT!"=="%%E" SET _EXTMATCH=1
+)
+IF !_EXTMATCH! == 0 (
+    IF !_DBG! == 1 ECHO [D] Skip: !_FILE! ^(extension^)
+    SET /A _SKIPPED+=1
+    :: Propagate all counters back to parent scope
+    FOR /F "tokens=1-4" %%A IN ("!_SCANNED! !_SUBMITTED! !_SKIPPED! !_FAILED!") DO (
+        ENDLOCAL & ENDLOCAL
+        SET /A _SCANNED=%%A
+        SET /A _SUBMITTED=%%B
+        SET /A _SKIPPED=%%C
+        SET /A _FAILED=%%D
+    )
+    GOTO :EOF
+)
+:: Size check (file may have been deleted since listing)
+IF "!_SZ!"=="" (
+    IF !_DBG! == 1 ECHO [D] Skip: !_FILE! ^(file not found^)
+    SET /A _SKIPPED+=1
+    FOR /F "tokens=1-4" %%A IN ("!_SCANNED! !_SUBMITTED! !_SKIPPED! !_FAILED!") DO (
+        ENDLOCAL & ENDLOCAL
+        SET /A _SCANNED=%%A
+        SET /A _SUBMITTED=%%B
+        SET /A _SKIPPED=%%C
+        SET /A _FAILED=%%D
+    )
+    GOTO :EOF
+)
+IF !_SZ! GTR !_MAXSZ! (
+    IF !_DBG! == 1 ECHO [D] Skip: !_FILE! ^(size: !_SZ!^)
+    SET /A _SKIPPED+=1
+    FOR /F "tokens=1-4" %%A IN ("!_SCANNED! !_SUBMITTED! !_SKIPPED! !_FAILED!") DO (
+        ENDLOCAL & ENDLOCAL
+        SET /A _SCANNED=%%A
+        SET /A _SUBMITTED=%%B
+        SET /A _SKIPPED=%%C
+        SET /A _FAILED=%%D
+    )
+    GOTO :EOF
+)
+:: Age check — FORFILES /D -N matches old files (<= today-N), so we check per-file
+:: and skip those that are too old.
+IF !_MAXAGE! GTR 0 (
+    SET "_ISOLD=0"
+    CALL :ISFILEOLD "!_FILE!" !_MAXAGE!
+    IF "!_ISOLD!"=="1" (
+        IF !_DBG! == 1 ECHO [D] Skip: !_FILE! ^(age: older than !_MAXAGE! days^)
+        SET /A _SKIPPED+=1
+        FOR /F "tokens=1-4" %%A IN ("!_SCANNED! !_SUBMITTED! !_SKIPPED! !_FAILED!") DO (
+            ENDLOCAL & ENDLOCAL
+            SET /A _SCANNED=%%A
+            SET /A _SUBMITTED=%%B
+            SET /A _SKIPPED=%%C
+            SET /A _FAILED=%%D
+        )
+        GOTO :EOF
+    )
+)
+:: Upload — increment _SCANNED only for files that pass filters
+SET /A _SCANNED+=1
+ECHO [+] Uploading: !_FILE!
+SET _HTTPCODE=
+"%_CURL%" -s -o nul -D "!_RESPTMP!.hdr" -w "%%{http_code}" -F "file=@!_FILE!;filename=!_FILE!" "%_SCHEME%://%_TS%:%_TP%/api/checkAsync?source=!_SRCURL!!_IDPARAM!" >"!_RESPTMP!" 2>nul
+SET _CURLRC=!ERRORLEVEL!
+IF !_CURLRC! == 0 (
+    SET /P _HTTPCODE=<"!_RESPTMP!"
+    DEL "!_RESPTMP!" 2>nul
+    IF "!_HTTPCODE!"=="" (
+        ECHO [ERROR] Failed: !_FILE! ^(empty response^) 1>&2
+        SET /A _FAILED+=1
+    ) ELSE IF "!_HTTPCODE!"=="503" (
+        :: Respect Retry-After header, capped at 60s, default 5s
+        SET _RETRYWAIT=5
+        IF EXIST "!_RESPTMP!.hdr" (
+            FOR /F "tokens=2 delims=: " %%H IN ('FINDSTR /I "^Retry-After:" "!_RESPTMP!.hdr"') DO (
+                SET /A _RETRYWAIT=%%H 2>nul
+                IF !_RETRYWAIT! LEQ 0 SET _RETRYWAIT=5
+                IF !_RETRYWAIT! GTR 60 SET _RETRYWAIT=60
             )
-            :: Note that file was skipped due to wrong extension
-            IF %DEBUG% == 1 (
-                IF !PROCESSED! == false ECHO Skipping %%F due to extension ...
+        )
+        DEL "!_RESPTMP!.hdr" 2>nul
+        ECHO [!] Server busy ^(503^), waiting !_RETRYWAIT!s before retry... 1>&2
+        SET /A _PINGCOUNT=!_RETRYWAIT!+1
+        PING -n !_PINGCOUNT! 127.0.0.1 >nul 2>&1
+        SET _HTTPCODE2=
+        "!_CURL!" -s -o nul -D "!_RESPTMP!.hdr" -w "%%{http_code}" -F "file=@!_FILE!;filename=!_FILE!" "!_SCHEME!://!_TS!:!_TP!/api/checkAsync?source=!_SRCURL!!_IDPARAM!" >"!_RESPTMP!" 2>nul
+        SET _CURLRC2=!ERRORLEVEL!
+        IF !_CURLRC2! == 0 (
+            SET /P _HTTPCODE2=<"!_RESPTMP!"
+            DEL "!_RESPTMP!" 2>nul
+            DEL "!_RESPTMP!.hdr" 2>nul
+            IF "!_HTTPCODE2!"=="503" (
+                ECHO [ERROR] Failed: !_FILE! ^(server still busy^) 1>&2
+                SET /A _FAILED+=1
+            ) ELSE IF "!_HTTPCODE2:~0,1!"=="2" (
+                SET /A _SUBMITTED+=1
+            ) ELSE (
+                ECHO [ERROR] Failed: !_FILE! ^(HTTP !_HTTPCODE2! on retry^) 1>&2
+                SET /A _FAILED+=1
             )
-            ENDLOCAL
+        ) ELSE (
+            DEL "!_RESPTMP!" 2>nul
+            DEL "!_RESPTMP!.hdr" 2>nul
+            ECHO [ERROR] Failed: !_FILE! ^(curl exit: !_CURLRC2!^) 1>&2
+            SET /A _FAILED+=1
         )
-        popd
+    ) ELSE IF "!_HTTPCODE:~0,1!"=="2" (
+        DEL "!_RESPTMP!.hdr" 2>nul
+        SET /A _SUBMITTED+=1
+    ) ELSE (
+        DEL "!_RESPTMP!.hdr" 2>nul
+        ECHO [ERROR] Failed: !_FILE! ^(HTTP !_HTTPCODE!^) 1>&2
+        SET /A _FAILED+=1
     )
+) ELSE (
+    DEL "!_RESPTMP!" 2>nul
+    DEL "!_RESPTMP!.hdr" 2>nul
+    ECHO [ERROR] Failed: !_FILE! ^(curl exit: !_CURLRC!^) 1>&2
+    SET /A _FAILED+=1
+)
+:: Clean up any leftover temp files from this iteration
+IF EXIST "!_RESPTMP!" DEL "!_RESPTMP!" 2>nul
+IF EXIST "!_RESPTMP!.hdr" DEL "!_RESPTMP!.hdr" 2>nul
+:: Propagate all counters back to parent scope
+FOR /F "tokens=1-4" %%A IN ("!_SCANNED! !_SUBMITTED! !_SKIPPED! !_FAILED!") DO (
+    ENDLOCAL & ENDLOCAL
+    SET /A _SCANNED=%%A
+    SET /A _SUBMITTED=%%B
+    SET /A _SKIPPED=%%C
+    SET /A _FAILED=%%D
+)
+GOTO :EOF
+
+:: ---------------------------------------------------------------
+:: Subroutine: ISFILEOLD
+:: Sets _ISOLD=1 if file is older than/equal to MAX_AGE days, else 0.
+:: ---------------------------------------------------------------
+:ISFILEOLD
+SETLOCAL DisableDelayedExpansion
+SET "_CHECK_FILE=%~1"
+SET "_CHECK_AGE=%~2"
+SET "_ISOLD=0"
+SET "_AGEDIR="
+SET "_AGENAME="
+FOR %%S IN ("%_CHECK_FILE%") DO (
+    SET "_AGEDIR=%%~dpS"
+    SET "_AGENAME=%%~nxS"
+)
+IF "%_AGEDIR%"=="" GOTO :ISFILEOLDRETURN
+IF "%_AGENAME%"=="" GOTO :ISFILEOLDRETURN
+
+FORFILES /P "%_AGEDIR%" /M "%_AGENAME%" /D -%_CHECK_AGE% /C "cmd /c if @isdir==FALSE exit /b 0" >nul 2>nul
+IF NOT ERRORLEVEL 1 SET "_ISOLD=1"
+
+:ISFILEOLDRETURN
+ENDLOCAL & SET "_ISOLD=%_ISOLD%"
+GOTO :EOF
+
+:DONE
+
+:: CLEANUP -------------------------------------------------------
+IF EXIST "!_FILELIST!" DEL "!_FILELIST!" 2>nul
+IF EXIST "!_RESPTMP!" DEL "!_RESPTMP!" 2>nul
+IF EXIST "!_RESPTMP!.hdr" DEL "!_RESPTMP!.hdr" 2>nul
+IF EXIST "!_RESPTMP!.code" DEL "!_RESPTMP!.code" 2>nul
+
+:: SUMMARY -------------------------------------------------------
+ECHO.
+ECHO [+] Done. scanned=!_SCANNED! submitted=!_SUBMITTED! skipped=!_SKIPPED! failed=!_FAILED!
+
+:: EXIT CODE: 1 if any uploads failed, 0 otherwise
+IF !_FAILED! GTR 0 (
+    ENDLOCAL
+    EXIT /b 1
 )
+ENDLOCAL
+EXIT /b 0
diff --git a/scripts/thunderstorm-collector.pl b/scripts/thunderstorm-collector.pl
index 7bafa6d..402c1b4 100755
--- a/scripts/thunderstorm-collector.pl
+++ b/scripts/thunderstorm-collector.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -s 
+#!/usr/bin/perl
 #
 # THOR Thunderstorm Collector
 # Florian Roth
@@ -7,12 +7,12 @@
 #
 # Requires LWP::UserAgent
 #   - on Linux: apt-get install libwww-perl
-#   - other: perl -MCPAN -e 'install Bundle::LWP' 
+#   - other: perl -MCPAN -e 'install Bundle::LWP'
 #
 # Usage examples:
-#   $> perl thunderstorm-collector.pl -- -s thunderstorm.internal.net
-#   $> perl thunderstorm-collector.pl -- --dir / --server thunderstorm.internal.net
-#   $> perl thunderstorm-collector.pl -- --dir / --server thunderstorm.internal.net --so "My Source"
+#   $> perl thunderstorm-collector.pl -s thunderstorm.internal.net
+#   $> perl thunderstorm-collector.pl --dir / --server thunderstorm.internal.net
+#   $> perl thunderstorm-collector.pl --dir / --server thunderstorm.internal.net --source "My Source"
 
 use warnings;
 use strict;
@@ -20,183 +20,635 @@
 use LWP::UserAgent;
 use File::Spec::Functions qw( catfile );
 use Sys::Hostname;
+use POSIX qw(strftime);
 
-use Cwd; # module for finding the current working directory 
+use Cwd; # module for finding the current working directory
 
 # Configuration
 our $debug = 0;
-my $targetdir = "/";
+my @targetdirs;
 my $server = "";
 my $port = 8080;
 my $scheme = "http";
 my $source = "";
-our $max_age = 3;       # in days
-our $max_size = 10;     # in megabytes
+my $ssl = 0;
+my $insecure = 0;
+my $ca_cert = "";
+my $sync_mode = 0;
+my $dry_run = 0;
+my $retries_opt = 3;
+my $progress_opt;       # undef = auto-detect, 1 = force on, 0 = force off
+our $max_age = 14;      # in days (harmonized with bash/ash)
+our $max_size_kb = 2048; # in KB (harmonized with bash/ash)
+our $interrupted = 0;
+# Note: size checks use $max_size_kb directly (in KB)
 our @skipElements = map { qr{$_} } ('^\/proc', '^\/mnt', '\.dat$', '\.npm');
-our @hardSkips = ('/proc', '/dev', '/sys');
+our @hardSkips = ('/proc', '/dev', '/sys', '/run', '/snap', '/.snapshots');
+
+# Network and special filesystem types (mount points with these types are excluded)
+our %networkFsTypes = map { $_ => 1 } qw(nfs nfs4 cifs smbfs smb3 sshfs fuse.sshfs afp webdav davfs2 fuse.rclone fuse.s3fs);
+our %specialFsTypes = map { $_ => 1 } qw(proc procfs sysfs devtmpfs devpts cgroup cgroup2 pstore bpf tracefs debugfs securityfs hugetlbfs mqueue autofs fusectl rpc_pipefs nsfs configfs binfmt_misc selinuxfs efivarfs ramfs);
+
+# Cloud storage folder names (lowercase)
+our %cloudDirNames = map { $_ => 1 } ('onedrive', 'dropbox', '.dropbox', 'googledrive', 'google drive',
+    'icloud drive', 'iclouddrive', 'nextcloud', 'owncloud', 'mega', 'megasync', 'tresorit', 'syncthing');
+
+sub get_excluded_mounts {
+    my @excluded;
+    if (open(my $fh, '<', '/proc/mounts')) {
+        while (my $line = <$fh>) {
+            my @parts = split(/\s+/, $line);
+            if (scalar @parts >= 3) {
+                my ($mount_point, $fs_type) = ($parts[1], $parts[2]);
+                # Decode octal escapes (\040 = space, \011 = tab, etc.)
+                # /proc/mounts encodes spaces and special chars as \NNN
+                $mount_point =~ s/\\([0-7]{3})/chr(oct($1))/ge;
+                if ($networkFsTypes{$fs_type} || $specialFsTypes{$fs_type}) {
+                    push @excluded, $mount_point;
+                }
+            }
+        }
+        close($fh);
+    }
+    return @excluded;
+}
+
+sub is_cloud_path {
+    my ($path) = @_;
+    my $lower = lc($path);
+    $lower =~ s/\\/\//g;
+    my @segments = split(/\//, $lower);
+    for my $seg (@segments) {
+        return 1 if $cloudDirNames{$seg};
+        return 1 if ($seg =~ /^onedrive[\s-]/ || $seg =~ /^nextcloud-/);
+    }
+    return 1 if ($lower =~ /\/library\/cloudstorage/);
+    return 0;
+}
 
 # Command Line Parameters
 GetOptions(
-    "dir|d=s"      => \$targetdir,  # --dir or -d
-    "server|s=s"   => \$server,     # --server or -s
-    "port|p=i"     => \$port,       # --port or -p
-    "source|so=s"  => \$source,     # --source or -so
-    "debug"        => \$debug       # --debug
+    "dir|d=s"        => \@targetdirs,   # --dir or -d (repeatable)
+    "server|s=s"     => \$server,       # --server or -s
+    "port|p=i"       => \$port,         # --port or -p
+    "source=s"       => \$source,       # --source (no short option to avoid conflict)
+    "ssl"            => \$ssl,          # --ssl (use HTTPS)
+    "insecure|k"     => \$insecure,     # --insecure or -k (skip TLS verify)
+    "ca-cert=s"      => \$ca_cert,      # --ca-cert PATH (custom CA bundle)
+    "sync"           => \$sync_mode,    # --sync (use /api/check)
+    "dry-run"        => \$dry_run,      # --dry-run
+    "retries=i"      => \$retries_opt,  # --retries N
+    "max-age=i"      => \$max_age,      # --max-age N (days)
+    "max-size-kb=i"  => \$max_size_kb,  # --max-size-kb N
+    "progress"       => sub { $progress_opt = 1; },   # --progress
+    "no-progress"    => sub { $progress_opt = 0; },   # --no-progress
+    "debug"          => \$debug         # --debug
 );
+$scheme = "https" if $ssl;
+
+# Default to "/" if no --dir specified
+@targetdirs = ("/") unless @targetdirs;
+
+# Validate numeric options
+if ($retries_opt < 0) {
+    print STDERR "[ERROR] --retries must be non-negative (got $retries_opt)\n";
+    exit 2;
+}
+if ($max_age < 0) {
+    print STDERR "[ERROR] --max-age must be non-negative (got $max_age)\n";
+    exit 2;
+}
+if ($max_size_kb < 0) {
+    print STDERR "[ERROR] --max-size-kb must be non-negative (got $max_size_kb)\n";
+    exit 2;
+}
+
+# Progress reporting: auto-detect TTY unless overridden
+our $show_progress;
+if (defined $progress_opt) {
+    $show_progress = $progress_opt;
+} else {
+    $show_progress = (-t STDERR) ? 1 : 0;
+}
 
 # Use Hostname as Source if not set
 if ( $source eq "" ) {
     $source = hostname;
 }
+# Preserve raw source for use in collection markers
+our $source_raw = $source;
+
+# URL-encode source parameter
+sub urlencode {
+    my $s = shift;
+    $s =~ s/([^A-Za-z0-9\-_.~])/sprintf("%%%02X", ord($1))/ge;
+    return $s;
+}
+
+# Track whether URL has query parameters
+our $url_has_query = 0;
+
 # Add Source to URL if available
+my $source_query = "";
 if ( $source ne "" ) {
-    print "[DEBUG] No source specified, using hostname: $source\n" if $debug;
-    $source = "?source=$source";
+    print "[DEBUG] Using source identifier: $source\n" if $debug;
+    $source_query = "?source=" . urlencode($source);
+    $url_has_query = 1;
 }
 
 # Composed Values
-our $api_endpoint = "$scheme://$server:$port/api/checkAsync$source";
+our $base_url = "$scheme://$server:$port";
+my $api_path = $sync_mode ? "/api/check" : "/api/checkAsync";
+our $api_endpoint = "$base_url$api_path$source_query";
 our $current_date = time;
+our $SCAN_ID = "";
 
 # Stats
 our $num_submitted = 0;
 our $num_processed = 0;
+our $num_failed = 0;
+our $collection_started = 0;
 
 # Objects
 our $ua;
 
-# Process Folders
-sub processDir { 
-    my ($workdir) = shift; 
-    my ($startdir) = &cwd; 
-    # keep track of where we began 
-    chdir($workdir) or do { print "[ERROR] Unable to enter dir $workdir:$!\n"; return; }; 
-    opendir(DIR, ".") or do { print "[ERROR] Unable to open $workdir:$!\n"; return; }; 
-    
-    my @names = readdir(DIR) or do { print "[ERROR] Unable to read $workdir:$!\n"; return; };
-    closedir(DIR); 
-    
-    foreach my $name (@names){ 
-        next if ($name eq "."); 
-        next if ($name eq ".."); 
-
-        #print("Workdir: $workdir Name: $name\n");
-        my $filepath = catfile($workdir, $name);
-        # Hard directory skips
-        my $skipHard = 0;
-        foreach ( @hardSkips ) { 
-            $skipHard = 1 if ( $filepath eq $_ ); 
+# Properly escape a string for JSON (control chars, backslashes, quotes)
+sub json_escape {
+    my ($s) = @_;
+    $s =~ s/\\/\\\\/g;
+    $s =~ s/"/\\"/g;
+    $s =~ s/\n/\\n/g;
+    $s =~ s/\r/\\r/g;
+    $s =~ s/\t/\\t/g;
+    $s =~ s/\x08/\\b/g;
+    $s =~ s/\x0c/\\f/g;
+    # Escape remaining control characters (U+0000 to U+001F)
+    $s =~ s/([\x00-\x1f])/sprintf("\\u%04x", ord($1))/ge;
+    return $s;
+}
+
+# Send a begin/end collection marker to /api/collection
+# Returns ($scan_id, $http_success) where:
+#   $scan_id = scan_id from response or ""
+#   $http_success = 1 if HTTP request succeeded, 0 if transport/HTTP failure
+sub collection_marker {
+    my ($marker_type, $scan_id, $stats_ref) = @_;
+    my $marker_url = "$base_url/api/collection";
+    $marker_url .= "?source=" . urlencode($source_raw) if $source_raw ne "";
+
+    my $timestamp = POSIX::strftime("%Y-%m-%dT%H:%M:%SZ", gmtime());
+    my $timestamp_esc = json_escape($timestamp);
+    # Use the preserved raw source value (user-provided or hostname)
+    my $src_escaped = json_escape($source_raw);
+
+    my $type_esc = json_escape($marker_type);
+    my $body = "{\"type\":\"$type_esc\",\"source\":\"$src_escaped\",\"collector\":\"perl/0.2\",\"timestamp\":\"$timestamp_esc\"";
+    $body .= ",\"scan_id\":\"" . json_escape($scan_id) . "\"" if (defined $scan_id && $scan_id ne '');
+    if ($stats_ref) {
+        $body .= ",\"stats\":{";
+        my @pairs;
+        for my $k (keys %$stats_ref) {
+            my $ek = json_escape($k);
+            my $v = $stats_ref->{$k};
+            if (defined $v && $v =~ /^-?\d+(?:\.\d+)?$/) {
+                push @pairs, qq{"$ek":$v};
+            } else {
+                my $ev = json_escape(defined $v ? $v : "");
+                push @pairs, qq{"$ek":"$ev"};
+            }
         }
-        next if $skipHard;
-        
-        # Is a Directory
-        if (-d $filepath){ 
-            #print "IS DIR!\n";
-            # Skip symbolic links
-            if (-l $filepath) { next; }
-            # Process Dir
-            &processDir($filepath); 
-            next; 
+        $body .= join(",", @pairs) . "}";
+    }
+    $body .= "}";
+
+    my $resp = eval {
+        $ua->post($marker_url,
+            "Content-Type" => "application/json",
+            Content => $body,
+        );
+    };
+    return ("", 0) unless $resp;
+    # 404/501 = endpoint not supported, continue without scan_id but success
+    if ($resp->code == 404 || $resp->code == 501) {
+        print STDERR "[WARN] Collection marker '$marker_type' not supported (HTTP " . $resp->code . ") — server does not implement /api/collection\n";
+        return ("", 1);
+    }
+    return ("", 0) unless $resp->is_success;
+
+    my $resp_body = $resp->content;
+    my $returned_id = "";
+    # Parse scan_id from JSON, handling escaped characters
+    if ($resp_body =~ /"scan_id"\s*:\s*"((?:[^"\\]|\\.)*)"/) {
+        my $raw_id = $1;
+        # Unescape JSON string escapes
+        $raw_id =~ s/\\(["\\\/])/$1/g;
+        $raw_id =~ s/\\n/\n/g;
+        $raw_id =~ s/\\r/\r/g;
+        $raw_id =~ s/\\t/\t/g;
+        $raw_id =~ s/\\u([0-9a-fA-F]{4})/chr(hex($1))/ge;
+        # Validate: scan_id should be alphanumeric/dash/underscore/dot (reject suspicious values)
+        if ($raw_id =~ /^[A-Za-z0-9\-_.]+$/) {
+            $returned_id = $raw_id;
         } else {
-            if ( $debug ) { print "[DEBUG] Checking $filepath ...\n"; }
+            print STDERR "[WARN] Received scan_id with unexpected characters, ignoring\n";
         }
+    }
+    return ($returned_id, 1);
+}
+
+# Count eligible files in a directory tree (for progress reporting)
+our $total_eligible = 0;
+
+sub is_hard_skip {
+    my ($path) = @_;
+    foreach (@hardSkips) {
+        if ($path eq $_ || (index($path, $_) == 0 && substr($path, length($_), 1) eq '/')) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+sub countDir {
+    my ($start) = @_;
+    my @stack = ($start);
+
+    while (@stack) {
+        last if $interrupted;
+        my $workdir = pop @stack;
+
+        opendir(my $dh, $workdir) or next;
+        my @names = readdir($dh);
+        closedir($dh);
+
+        foreach my $name (@names) {
+            next if ($name eq "." || $name eq "..");
+            last if $interrupted;
+
+            my $filepath = catfile($workdir, $name);
+            # Hard directory skips
+            next if is_hard_skip($filepath);
+            next if is_cloud_path($filepath);
 
-        # Characteristics
-        my $size = (stat($filepath))[7];
-        my $mdate = (stat($filepath))[9];
-        #print("SIZE: $size MDATE: $mdate\n");
-
-        # Count
-        $num_processed++;
-
-        # Skip some files ----------------------------------------
-        # Skip Folders / elements
-        my $skipRegex = 0;
-        # Regex Checks
-        foreach ( @skipElements ) { 
-            if ( $filepath =~ $_ ) {
-                if ( $debug ) { print "[DEBUG] Skipping file due to configured exclusion $filepath\n"; }
-                $skipRegex = 1;
-            } 
+            # Use lstat consistently to avoid following symlinks (mirrors processDir)
+            my @st = lstat($filepath);
+            next unless @st;
+            # Skip symlinks
+            next if -l _;
+
+            if (-d _) {
+                push @stack, $filepath;
+                next;
+            }
+
+            # Only process regular files
+            next unless -f _;
+
+            my $size = $st[7];
+            my $mdate = $st[9];
+
+            # Apply same skip logic as processDir
+            my $skipRegex = 0;
+            foreach (@skipElements) {
+                if ($filepath =~ $_) { $skipRegex = 1; last; }
+            }
+            next if $skipRegex;
+            next if (defined $size && ($size / 1024) > $max_size_kb);
+            next if (defined $mdate && $mdate < ($current_date - ($max_age * 86400)));
+
+            $total_eligible++;
         }
-        next if $skipRegex;
-        # Size
-        if ( ( $size / 1024 / 1024 ) gt $max_size ) {
-            if ( $debug ) { print "[DEBUG] Skipping file due to file size $filepath\n"; }
-            next;
+    }
+}
+
+# Process Folders (iterative to avoid stack overflow on deep trees)
+sub processDir {
+    my ($start) = @_;
+    my @stack = ($start);
+
+    while (@stack) {
+        last if $interrupted;
+        my $workdir = pop @stack;
+
+        opendir(my $dh, $workdir) or do { print STDERR "[ERROR] Unable to open $workdir:$!\n"; next; };
+
+        my @names = readdir($dh);
+        closedir($dh);
+
+        next if !@names;
+
+        foreach my $name (@names){
+            next if ($name eq ".");
+            next if ($name eq "..");
+
+            # Check for interruption
+            last if $interrupted;
+
+            my $filepath = catfile($workdir, $name);
+            # Hard directory skips (prefix match)
+            next if is_hard_skip($filepath);
+
+            # Skip cloud storage paths
+            next if is_cloud_path($filepath);
+
+            # Use lstat to avoid following symlinks; use _ for cached results
+            my @st = lstat($filepath);
+            next unless @st;  # skip if stat fails
+
+            # Check symlinks using cached lstat result
+            next if -l _;
+
+            # Is a Directory
+            if (-d _){
+                push @stack, $filepath;
+                next;
+            }
+
+            # Only process regular files
+            next unless -f _;
+
+            # Is a file
+            if ( $debug ) { print "[DEBUG] Checking $filepath ...\n"; }
+
+            my $size = $st[7];
+            my $mdate = $st[9];
+
+            # Skip some files ----------------------------------------
+            # Skip Folders / elements
+            my $skipRegex = 0;
+            # Regex Checks
+            foreach ( @skipElements ) {
+                if ( $filepath =~ $_ ) {
+                    if ( $debug ) { print "[DEBUG] Skipping file due to configured exclusion $filepath\n"; }
+                    $skipRegex = 1;
+                }
+            }
+            next if $skipRegex;
+            # Size
+            if ( defined $size && ( $size / 1024 ) > $max_size_kb ) {
+                if ( $debug ) { print "[DEBUG] Skipping file due to file size $filepath\n"; }
+                next;
+            }
+            # Age
+            if ( defined $mdate && $mdate < ( $current_date - ($max_age * 86400) ) ) {
+                if ( $debug ) { print "[DEBUG] Skipping file due to age $filepath\n"; }
+                next;
+            }
+
+            # Count (after all skip checks, so only eligible files are counted)
+            $num_processed++;
+
+            # Progress reporting with [N/total] X% format
+            if ($show_progress) {
+                if ($total_eligible > 0) {
+                    my $pct = int(($num_processed / $total_eligible) * 100);
+                    $pct = 100 if $pct > 100;
+                    print STDERR "\r[$num_processed/$total_eligible] $pct%   ";
+                } else {
+                    print STDERR "\r[PROGRESS] Processed: $num_processed Submitted: $num_submitted   ";
+                }
+            }
+
+            # Submit
+            &submitSample($filepath);
         }
-        # Age
-        #print("MDATE: $mdate CURR_DATE: $current_date\n");
-        if ( $mdate lt ( $current_date - ($max_age * 86400) ) ) {
-            if ( $debug ) { print "[DEBUG] Skipping file due to age $filepath\n"; }
-            next;
-        }       
-        
-        # Submit
-        &submitSample($filepath);
-
-        chdir($startdir) or die "Unable to change back to dir $startdir:$!\n"; 
-    } 
-} 
+    }
+}
 
 sub submitSample {
     my ($filepath) = shift;
-    print "[SUBMIT] Submitting $filepath ...\n";
+    if ($dry_run) {
+        print "[DRY-RUN] Would submit $filepath ...\n";
+        $num_submitted++;
+        return;
+    }
+    print STDERR "[SUBMIT] Submitting $filepath ...\n";
     my $retry = 0;
-    for ($retry = 0; $retry < 4; $retry++) {
-        if ($retry > 0) {
-            my $sleep_time = 2 << $retry;
-            print "[SUBMIT] Waiting $sleep_time seconds to retry submitting $filepath ...\n";
-            sleep($sleep_time)
+    my $successful = 0;
+    my $next_sleep = 0;  # sleep time before next attempt (0 = no sleep for first attempt)
+    for ($retry = 0; $retry <= $retries_opt; $retry++) {
+        if ($next_sleep > 0) {
+            print STDERR "[SUBMIT] Waiting $next_sleep seconds to retry submitting $filepath ...\n";
+            sleep($next_sleep);
         }
-        my $successful = 0;
+        $successful = 0;
+        $next_sleep = 0;
         eval {
-            my $req = $ua->post($api_endpoint,
+            # Sanitize filename metadata: encode to UTF-8 with replacement, strip control chars
+        my $safe_path = $filepath;
+        if ($] >= 5.008) {
+            require Encode;
+            # Decode byte string as UTF-8, replacing invalid sequences
+            # FB_DEFAULT (0x0001) was introduced in Encode 2.53 (Perl 5.14);
+            # use the numeric value directly for Perl 5.8-5.12 compatibility
+            $safe_path = Encode::decode('UTF-8', $safe_path, 0x0001);
+            $safe_path = Encode::encode('UTF-8', $safe_path);
+        }
+        # Remove control characters except tab
+        $safe_path =~ s/[\x00-\x08\x0b\x0c\x0e-\x1f]//g;
+        my $req = $ua->post($api_endpoint,
                 Content_Type => 'form-data',
                 Content => [
-                    "file" => [ $filepath ],
+                    # Preserve full client path in multipart filename for filename IOC matching
+                    "file" => [ $filepath, $safe_path ],
                 ],
             );
             $successful = $req->is_success;
-            $num_submitted++;
-            print "\nError: ", $req->status_line unless $successful;
+            if (!$successful) {
+                if ($req->code == 503) {
+                    my $retry_after = 30;
+                    my $ra = $req->header('Retry-After');
+                    if (defined $ra && $ra =~ /^\d+$/) {
+                        $retry_after = int($ra);
+                        $retry_after = 300 if $retry_after > 300;  # cap at 5 minutes
+                    }
+                    $next_sleep = $retry_after;
+                    print STDERR "[SUBMIT] Server busy (503), retrying in ${retry_after}s ...\n";
+                } else {
+                    # Exponential backoff for non-503 errors: 2, 4, 8, 16, ...
+                    my $backoff = 2 ** ($retry + 1);
+                    $backoff = 300 if $backoff > 300;
+                    $next_sleep = $backoff;
+                    print STDERR "[ERROR] Upload failed for '$filepath': ", $req->status_line, "\n";
+                }
+            }
+            1;  # Return truthy so the 'or do { }' block doesn't execute on success
         } or do {
             my $error = $@ || 'Unknown failure';
-            warn "Could not submit '$filepath' - $error";
+            print STDERR "[ERROR] Could not submit '$filepath' - $error\n";
+            # Exponential backoff on exception
+            my $backoff = 2 ** ($retry + 1);
+            $backoff = 300 if $backoff > 300;
+            $next_sleep = $backoff;
         };
         if ($successful) {
+            $num_submitted++;
             last;
         }
     }
+    my $total_attempts = $retries_opt + 1;
+    if (!$successful) {
+        $num_failed++;
+        print STDERR "[ERROR] Failed to submit '$filepath' after $total_attempts attempts\n";
+    }
 }
 
 # MAIN ----------------------------------------------------------------
-# Default Values 
-print "==============================================================\n";
-print "    ________                __            __                  \n";
-print "   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _    \n";
-print "    / / / _ \\/ // / _ \\/ _  / -_) __(_--/ __/ _ \\/ __/  ' \\   \n";
-print "   /_/ /_//_/\\_,_/_//_/\\_,_/\\__/_/ /___/\\__/\\___/_/ /_/_/_/   \n";
-print "                                                              \n";
-print "   Florian Roth, Nextron Systems GmbH, 2021                   \n";
-print "                                                              \n";
-print "==============================================================\n";
-print "Target Directory: '$targetdir'\n";
-print "Thunderstorm Server: '$server'\n";
-print "Thunderstorm Port: '$port'\n";
-print "Using API Endpoint: $api_endpoint\n";
-print "Maximum Age of Files: $max_age\n";
-print "Maximum File Size: $max_size\n";
-print "\n";
-
-# Instanciate an object 
+# Default Values
+print STDERR "==============================================================\n";
+print STDERR "    ________                __            __                  \n";
+print STDERR "   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _    \n";
+print STDERR "    / / / _ \\/ // / _ \\/ _  / -_) __(_--/ __/ _ \\/ __/  ' \\   \n";
+print STDERR "   /_/ /_//_/\\_,_/_//_/\\_,_/\\__/_/ /___/\\__/\\___/_/ /_/_/_/   \n";
+print STDERR "                                                              \n";
+print STDERR "   Florian Roth, Nextron Systems GmbH, 2021                   \n";
+print STDERR "                                                              \n";
+print STDERR "==============================================================\n";
+if ($server eq "") {
+    print STDERR "[ERROR] No Thunderstorm server specified. Use --server or -s.\n";
+    exit 2;
+}
+# Validate server as hostname, IPv4, or bracketed IPv6 — reject URI delimiters
+if ($server !~ /^(?:\[[0-9a-fA-F:]+\]|[A-Za-z0-9](?:[A-Za-z0-9\-]*[A-Za-z0-9])?(?:\.[A-Za-z0-9](?:[A-Za-z0-9\-]*[A-Za-z0-9])?)*)$/) {
+    print STDERR "[ERROR] Invalid server value '$server'. Must be a hostname, IPv4 address, or bracketed IPv6 address.\n";
+    exit 2;
+}
+print STDERR "Target Directories: " . join(", ", map { "'$_'" } @targetdirs) . "\n";
+print STDERR "Thunderstorm Server: '$server'\n";
+print STDERR "Thunderstorm Port: '$port'\n";
+print STDERR "Using API Endpoint: $api_endpoint\n";
+print STDERR "Maximum Age of Files: $max_age days\n";
+print STDERR "Maximum File Size: $max_size_kb KB\n";
+print STDERR "\n";
+
+# Extend hardSkips with mount points of network/special filesystems
+{
+    my %seen = map { $_ => 1 } @hardSkips;
+    for my $mp (get_excluded_mounts()) {
+        push @hardSkips, $mp unless $seen{$mp}++;
+    }
+}
+
+# Auto-enable SSL if TLS options specified without --ssl
+if (!$ssl && ($ca_cert ne "" || $insecure)) {
+    print STDERR "[WARN] TLS option specified without --ssl, auto-enabling SSL\n";
+    $ssl = 1;
+    $scheme = "https";
+    $base_url = "$scheme://$server:$port";
+    $api_endpoint = "$base_url$api_path$source_query";
+}
+
+# Instantiate an object
 $ua = LWP::UserAgent->new;
+if ($ssl) {
+    if ($insecure) {
+        $ua->ssl_opts(verify_hostname => 0, SSL_verify_mode => 0x00);
+    } elsif ($ca_cert ne "") {
+        if (! -f $ca_cert) {
+            print STDERR "[ERROR] CA certificate file not found: $ca_cert\n";
+            exit 2;
+        }
+        $ua->ssl_opts(SSL_ca_file => $ca_cert);
+    }
+}
+
+# Signal handling: set flag only (async-signal-safe), defer network I/O to main loop
+$SIG{INT} = $SIG{TERM} = sub {
+    my $sig = shift;
+    $interrupted = 1;
+    print STDERR "\n[WARN] Caught SIG$sig, will send interrupted collection marker and exit ...\n";
+};
+
+# Pre-scan to count eligible files for progress reporting
+if ($show_progress) {
+    print STDERR "[INFO] Counting eligible files for progress reporting ...\n";
+    for my $dir (@targetdirs) {
+        countDir($dir);
+        last if $interrupted;
+    }
+    print STDERR "[INFO] Found $total_eligible eligible files\n" if !$interrupted;
+}
+
+print STDERR "Starting the walk at: " . join(", ", @targetdirs) . " ...\n";
+
+# Send collection begin marker (with single retry after 2s on failure)
+my ($begin_id, $begin_ok) = collection_marker("begin", "", undef);
+if (!$begin_ok) {
+    print STDERR "[WARN] Initial connection to collection API failed, retrying in 2s ...\n";
+    sleep(2);
+    ($begin_id, $begin_ok) = collection_marker("begin", "", undef);
+}
+if (!$begin_ok) {
+    print STDERR "[ERROR] Cannot connect to Thunderstorm server at $base_url/api/collection after retry. Aborting.\n";
+    exit 2;
+}
+$collection_started = 1;
+$SCAN_ID = $begin_id;
+if ($SCAN_ID) {
+    print STDERR "[INFO] Collection scan_id: $SCAN_ID\n";
+    # Determine separator based on whether URL already has query params
+    my $sep = $url_has_query ? "&" : "?";
+    $api_endpoint .= "${sep}scan_id=" . urlencode($SCAN_ID);
+    $url_has_query = 1;
+}
 
-print "Starting the walk at: $targetdir ...\n";
 # Start the walk
-&processDir($targetdir);
+for my $dir (@targetdirs) {
+    last if $interrupted;
+    processDir($dir);
+}
+
+# If interrupted, send interrupted marker and exit from normal execution context
+if ($interrupted) {
+    if ($collection_started) {
+        my $int_date = time;
+        my $int_elapsed = $int_date - $current_date;
+        my ($int_id, $int_ok) = eval {
+            collection_marker("interrupted", $SCAN_ID, {
+                scanned  => $num_processed,
+                submitted => $num_submitted,
+                failed   => $num_failed,
+                elapsed_seconds => $int_elapsed,
+            });
+        };
+    if (!$int_ok) {
+        print STDERR "[ERROR] Failed to send interrupted collection marker\n";
+    }
+    }
+    # Clear progress line if we were showing progress
+    if ($show_progress) {
+        print STDERR "\r" . (" " x 60) . "\r";
+    }
+    my $int_minutes = int((time - $current_date) / 60);
+    print STDERR "Thunderstorm Collector Run interrupted (Checked: $num_processed Submitted: $num_submitted Failed: $num_failed Minutes: $int_minutes)\n";
+    exit 1;
+}
 
-# End message
+# Send collection end marker with stats
 my $end_date = time;
-my $minutes = int(( $end_date - $current_date ) / 60);
-print "Thunderstorm Collector Run finished (Checked: $num_processed Submitted: $num_submitted Minutes: $minutes)\n";
+my $elapsed = $end_date - $current_date;
+my $marker_failed = 0;
+my ($end_id, $end_ok) = collection_marker("end", $SCAN_ID, {
+    scanned  => $num_processed,
+    submitted => $num_submitted,
+    failed   => $num_failed,
+    elapsed_seconds => $elapsed,
+});
+if (!$end_ok) {
+    print STDERR "[ERROR] Failed to send end collection marker\n";
+    $marker_failed = 1;
+}
+
+# Clear progress line if we were showing progress
+if ($show_progress) {
+    print STDERR "\r" . (" " x 60) . "\r";
+}
+
+my $minutes = int( $elapsed / 60 );
+print STDERR "Thunderstorm Collector Run finished (Checked: $num_processed Submitted: $num_submitted Failed: $num_failed Minutes: $minutes)\n";
+
+# Exit codes: 0 = success, 1 = partial failure, 2 = fatal error
+if ($num_failed > 0 || $marker_failed) {
+    exit 1;
+}
+exit 0;
diff --git a/scripts/thunderstorm-collector.ps1 b/scripts/thunderstorm-collector.ps1
index e73eab2..d462456 100644
--- a/scripts/thunderstorm-collector.ps1
+++ b/scripts/thunderstorm-collector.ps1
@@ -4,7 +4,7 @@
 # Author: Florian Roth 
 # Version: 0.2.0
 # Date Created: 07.10.2020  
-# Last Modified: 10.03.2026
+# Last Modified: 22.09.2025
 ################################################## 
 
 #Requires -Version 3
@@ -24,9 +24,9 @@
     .PARAMETER Folder 
         Folder to process (default: C:\)
     .PARAMETER MaxAge 
-        Select files based on the number of days in which the file has been create or modified (default: 0 = no age selection)
+        Select files based on the number of days in which the file has been create or modified (default: 14 days)
     .PARAMETER MaxSize
-        Extensions to select for submission (default: all of them)    
+        Maximum file size in MegaBytes for submission (default: 2MB / 2048KB)
     .PARAMETER Extensions
         Extensions to select for submission (default: all of them)
     .PARAMETER Debugging 
@@ -71,28 +71,50 @@ param
         [string]$Folder = "C:\",
 
     [Parameter(
-        HelpMessage='Select files based on the number of days in which the file has been create or modified (default: 0 = no age selection)')]
+        HelpMessage='Select files based on the number of days in which the file has been create or modified (default: 14 days)')]
         [ValidateNotNullOrEmpty()]
         [Alias('MA')]
-        [int]$MaxAge,
+        [int]$MaxAge = 14,
 
     [Parameter(
-        HelpMessage='Select only files smaller than the given number in MegaBytes (default: 20MB) ')]
+        HelpMessage='Select only files smaller than the given number in MegaBytes (default: 2MB / 2048KB) ')]
         [ValidateNotNullOrEmpty()]
         [Alias('MS')]
-        [int]$MaxSize,
+        [int]$MaxSize = 2,
 
-    [Parameter(HelpMessage='Extensions to select for submission (default: all of them)')]
+    [Parameter(HelpMessage='Extensions to select for submission (default: recommended preset)')]
         [ValidateNotNullOrEmpty()]
         [Alias('E')]
         [string[]]$Extensions,
 
+    [Parameter(HelpMessage='Submit all file extensions (overrides -Extensions)')]
+        [switch]$AllExtensions = $False,
+
+    [Parameter(HelpMessage='Use HTTPS instead of HTTP for Thunderstorm communication')]
+        [Alias('SSL')]
+        [switch]$UseSSL = $False,
+
+    [Parameter(HelpMessage='Path to custom CA certificate bundle for TLS verification')]
+        [string]$CACert = "",
+
+    [Parameter(HelpMessage='Skip TLS certificate verification (insecure)')]
+        [Alias('k')]
+        [switch]$Insecure = $False,
+
     [Parameter(HelpMessage='Enables debug output and skips cleanup at the end of the scan')]
+
         [ValidateNotNullOrEmpty()]
         [Alias('D')]
-        [switch]$Debugging = $False
+        [switch]$Debugging = $False,
+
+    [Parameter(HelpMessage='Force enable progress reporting')]
+        [switch]$Progress = $False,
+
+    [Parameter(HelpMessage='Force disable progress reporting')]
+        [switch]$NoProgress = $False
 )
 
+
 # Fixing Certain Platform Environments --------------------------------
 $AutoDetectPlatform = ""
 $OutputPath = $PSScriptRoot
@@ -124,25 +146,46 @@ if ( $OutputPath -eq "" -or $OutputPath.Contains("Advanced Threat Protection") )
 #[int]$MaxAge = 99
 
 # Maximum Size
-[int]$MaxSize = 20
+# Apply default only when no -MaxSize parameter was explicitly passed
+if (-not $PSBoundParameters.ContainsKey('MaxSize')) {
+    [int]$MaxSize = 2
+}
+# Enforce hard upper bound on MaxSize to prevent out-of-memory conditions
+if ($MaxSize -gt 200) {
+    Write-Host "[!] MaxSize capped to 200 MB to prevent excessive memory usage"
+    $MaxSize = 200
+}
 
 # Extensions
-# Recommended Preset
-[string[]]$Extensions = @('.asp','.vbs','.ps','.ps1','.rar','.tmp','.bas','.bat','.chm','.cmd','.com','.cpl','.crt','.dll','.exe','.hta','.js','.lnk','.msc','.ocx','.pcd','.pif','.pot','.reg','.scr','.sct','.sys','.url','.vb','.vbe','.vbs','.wsc','.wsf','.wsh','.ct','.t','.input','.war','.jsp','.php','.asp','.aspx','.doc','.docx','.pdf','.xls','.xlsx','.ppt','.pptx','.tmp','.log','.dump','.pwd','.w','.txt','.conf','.cfg','.conf','.config','.psd1','.psm1','.ps1xml','.clixml','.psc1','.pssc','.pl','.www','.rdp','.jar','.docm','.ace','.job','.temp','.plg','.asm')
-# Collect Every Extension
-#[string[]]$Extensions = @()
+# -AllExtensions overrides any -Extensions value
+# Note: PS 2.0 permanently binds parameter validation to $Extensions,
+# so we use a separate $ActiveExtensions variable for the working copy.
+if ($AllExtensions) {
+    [string[]]$ActiveExtensions = @()
+} elseif ($PSBoundParameters.ContainsKey('Extensions')) {
+    # Normalize user-supplied extensions: lowercase and ensure leading dot
+    [string[]]$ActiveExtensions = $Extensions | ForEach-Object {
+        $ext = $_.ToLowerInvariant().Trim()
+        if ($ext -ne '' -and -not $ext.StartsWith('.')) { $ext = '.' + $ext }
+        $ext
+    }
+} else {
+    # Apply recommended preset only when no -Extensions parameter was explicitly passed
+    [string[]]$ActiveExtensions = @('.asp','.vbs','.ps','.ps1','.rar','.tmp','.bas','.bat','.chm','.cmd','.com','.cpl','.crt','.dll','.exe','.hta','.js','.lnk','.msc','.ocx','.pcd','.pif','.pot','.reg','.scr','.sct','.sys','.url','.vb','.vbe','.vbs','.wsc','.wsf','.wsh','.ct','.t','.input','.war','.jsp','.php','.asp','.aspx','.doc','.docx','.pdf','.xls','.xlsx','.ppt','.pptx','.tmp','.log','.dump','.pwd','.w','.txt','.conf','.cfg','.conf','.config','.psd1','.psm1','.ps1xml','.clixml','.psc1','.pssc','.pl','.www','.rdp','.jar','.docm','.ace','.job','.temp','.plg','.asm')
+}
 
 # Debug
-$Debug = $False
+$Debug = $Debugging
 
 # Show Help -----------------------------------------------------------
 # No Thunderstorm server 
-if ( $Args.Count -eq 0 -and $ThunderstormServer -eq "" ) {
+if ( $ThunderstormServer -eq "" ) {
     Get-Help $MyInvocation.MyCommand.Definition -Detailed
-    Write-Host -ForegroundColor Yellow 'Note: You must at least define an Thunderstorm server (-ThunderstormServer)'
-    return
+    Write-Host -ForegroundColor Yellow 'Note: You must at least define a Thunderstorm server (-ThunderstormServer)'
+    exit 2
 }
 
+
 # #####################################################################
 # Functions -----------------------------------------------------------
 # #####################################################################
@@ -156,7 +199,8 @@ function Write-Log {
         [Parameter(Position=1, HelpMessage="Log file to write into")]
             [ValidateNotNullOrEmpty()]
             [Alias('SS')]
-            [IO.FileInfo]$LogFile = "thunderstorm-collector.log",
+            [IO.FileInfo]$LogFile = (Join-Path $OutputPath "thunderstorm-collector.log"),
+
 
         [Parameter(Position=3, HelpMessage="Level")]
             [ValidateNotNullOrEmpty()]
@@ -179,7 +223,8 @@ function Write-Log {
     if ( $Level -eq "Warning" ) {
         Write-Warning -Message "$($Indicator) $($Entry)"
     } elseif ( $Level -eq "Error" ) {
-        Write-Host "$($Indicator) $($Entry)" -ForegroundColor Red
+        [Console]::Error.WriteLine("$($Indicator) $($Entry)")
+
     } elseif ( $Level -eq "Debug" -and $Debug -eq $False ) {
         return
     } else {
@@ -187,7 +232,8 @@ function Write-Log {
     }
 
     # Log File
-    if ( $global:NoLog -eq $False ) {
+    if ( -not $global:NoLog ) {
+
         "$(Get-Date -Format 'yyyy-MM-dd HH:mm:ss.fff') $($env:COMPUTERNAME): $Entry" | Out-File -FilePath $LogFile -Append
     }
 }
@@ -210,6 +256,13 @@ Write-Host "=============================================================="
 $DateStamp = Get-Date -f yyyy-MM-dd
 $StartTime = $(Get-Date)
 
+# Validate folder exists
+if (-not (Test-Path -Path $Folder -PathType Container)) {
+    Write-Log "Folder not found: $Folder" -Level "Error"
+    exit 2
+}
+
+
 Write-Log "Started Thunderstorm Collector with PowerShell v$($PSVersionTable.PSVersion)"
 
 # ---------------------------------------------------------------------
@@ -225,96 +278,560 @@ if ( $AutoDetectPlatform -ne "" ) {
 }
 
 # URL Creation
+$SourceParam = ""
 if ( $Source -ne "" ) {
     Write-Log "Using Source: $($Source)"
-    $SourceParam = "?Source=$Source"
+    $EncodedSource = [uri]::EscapeDataString($Source)
+    $SourceParam = "?source=$EncodedSource"
 }
-$Url = "http://$($ThunderstormServer):$($ThunderstormPort)/api/checkAsync$($SourceParam)"
+$Protocol = "http"
+if ( $UseSSL ) {
+    $Protocol = "https"
+    # Enforce TLS 1.2+ (required on older .NET / PS versions that default to SSL3/TLS1.0)
+    try {
+        [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 -bor [Net.SecurityProtocolType]::Tls13
+    } catch {
+        # TLS 1.3 not available on older .NET; fall back to TLS 1.2 only
+        [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12
+    }
+    Write-Log "HTTPS mode enabled (TLS 1.2+)"
+
+    if ($Insecure) {
+        Write-Log "TLS certificate verification DISABLED (insecure mode)" -Level "Warning"
+        # Use ServerCertificateValidationCallback (works on .NET 4.5+ / PS 3+)
+        try {
+            [System.Net.ServicePointManager]::ServerCertificateValidationCallback = [System.Net.Security.RemoteCertificateValidationCallback]{
+                param($sender, $certificate, $chain, $sslPolicyErrors)
+                return $true
+            }
+        } catch {
+            # Fallback: try legacy ICertificatePolicy for older .NET
+            try {
+                if (-not ([System.Management.Automation.PSTypeName]'TrustAllCertsPolicy').Type) {
+                    Add-Type @"
+using System.Net;
+using System.Net.Security;
+using System.Security.Cryptography.X509Certificates;
+public class TrustAllCertsPolicy : ICertificatePolicy {
+    public bool CheckValidationResult(
+        ServicePoint srvPoint, X509Certificate certificate,
+        WebRequest request, int certificateProblem) { return true; }
+}
+"@
+                }
+                [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy
+            } catch {
+                Write-Log "Failed to set insecure certificate policy: $_" -Level "Warning"
+            }
+        }
+    } elseif ($CACert -ne "") {
+        if (-not (Test-Path $CACert)) {
+            Write-Log "CA certificate file not found: $CACert" -Level "Error"
+            exit 2
+        }
+        Write-Log "Using custom CA certificate: $CACert"
+        try {
+            # Load custom CA and set up validation callback with hostname verification
+            $script:CustomCACert = New-Object System.Security.Cryptography.X509Certificates.X509Certificate2($CACert)
+            $script:ExpectedHost = $ThunderstormServer
+            if (-not ([System.Management.Automation.PSTypeName]'CustomCACertValidator').Type) {
+                Add-Type @"
+using System;
+using System.Net;
+using System.Net.Security;
+using System.Security.Cryptography.X509Certificates;
+using System.Text.RegularExpressions;
+
+public static class CustomCACertValidator {
+    private static X509Certificate2 _ca;
+    private static string _expectedHost;
+
+    public static void Configure(X509Certificate2 ca, string expectedHost) {
+        _ca = ca;
+        _expectedHost = expectedHost;
+    }
+
+    public static bool ValidateCallback(
+        object sender, X509Certificate certificate,
+        X509Chain chain, SslPolicyErrors sslPolicyErrors) {
+        // If the platform says everything is fine, accept
+        if (sslPolicyErrors == SslPolicyErrors.None) return true;
+
+        X509Certificate2 cert2 = new X509Certificate2(certificate);
+
+        // Build chain with our custom CA
+        X509Chain customChain = new X509Chain();
+        customChain.ChainPolicy.ExtraStore.Add(_ca);
+        customChain.ChainPolicy.VerificationFlags = X509VerificationFlags.AllowUnknownCertificateAuthority;
+        customChain.ChainPolicy.RevocationMode = X509RevocationMode.NoCheck;
+        bool chainValid = customChain.Build(cert2);
+        if (!chainValid) return false;
+
+        // Verify the chain actually roots at our CA
+        bool rootedAtCA = false;
+        foreach (var element in customChain.ChainElements) {
+            if (element.Certificate.Thumbprint == _ca.Thumbprint) {
+                rootedAtCA = true;
+                break;
+            }
+        }
+        if (!rootedAtCA) return false;
+
+        // Hostname verification: check SAN and CN
+        if (!MatchesHost(cert2, _expectedHost)) return false;
+
+        return true;
+    }
+
+    private static bool MatchesHost(X509Certificate2 cert, string host) {
+        // Check Subject Alternative Names (OID 2.5.29.17)
+        foreach (var ext in cert.Extensions) {
+            if (ext.Oid.Value == "2.5.29.17") {
+                string san = ext.Format(true);
+                // Parse DNS Name entries
+                foreach (string line in san.Split(new char[]{'\r','\n'}, StringSplitOptions.RemoveEmptyEntries)) {
+                    string trimmed = line.Trim();
+                    if (trimmed.StartsWith("DNS Name=", StringComparison.OrdinalIgnoreCase)) {
+                        string dnsName = trimmed.Substring(9).Trim();
+                        if (HostMatchesPattern(host, dnsName)) return true;
+                    }
+                    // Also handle "DNS:" format
+                    if (trimmed.StartsWith("DNS:", StringComparison.OrdinalIgnoreCase)) {
+                        string dnsName = trimmed.Substring(4).Trim();
+                        if (HostMatchesPattern(host, dnsName)) return true;
+                    }
+                }
+            }
+        }
+        // Fallback to CN in Subject
+        string subject = cert.Subject;
+        var match = Regex.Match(subject, @"CN\s*=\s*([^,]+)");
+        if (match.Success) {
+            string cn = match.Groups[1].Value.Trim();
+            if (HostMatchesPattern(host, cn)) return true;
+        }
+        return false;
+    }
+
+    private static bool HostMatchesPattern(string host, string pattern) {
+        if (string.Equals(host, pattern, StringComparison.OrdinalIgnoreCase))
+            return true;
+        // Wildcard matching: *.example.com matches foo.example.com
+        if (pattern.StartsWith("*.")) {
+            string suffix = pattern.Substring(1); // .example.com
+            int dotIndex = host.IndexOf('.');
+            if (dotIndex > 0) {
+                string hostSuffix = host.Substring(dotIndex);
+                if (string.Equals(hostSuffix, suffix, StringComparison.OrdinalIgnoreCase))
+                    return true;
+            }
+        }
+        return false;
+    }
+}
+"@
+            }
+            [CustomCACertValidator]::Configure($script:CustomCACert, $script:ExpectedHost)
+            [System.Net.ServicePointManager]::ServerCertificateValidationCallback = [System.Net.Security.RemoteCertificateValidationCallback]([CustomCACertValidator].GetMethod('ValidateCallback'))
+        } catch {
+            Write-Log "Failed to configure custom CA certificate: $_" -Level "Error"
+            exit 2
+        }
+    }
+}
+$BaseUrl = "$($Protocol)://$($ThunderstormServer):$($ThunderstormPort)"
+$Url = "$BaseUrl/api/checkAsync$($SourceParam)"
 Write-Log "Sending to URI: $($Url)" -Level "Debug"
+$ScanId = ""
+
+function Send-CollectionMarker {
+    param(
+        [string]$MarkerType,
+        [string]$ScanId = "",
+        [hashtable]$Stats = $null,
+        [switch]$Fatal = $False
+    )
+    $MarkerUrl = "$BaseUrl/api/collection"
+    # Let ConvertTo-Json handle proper JSON escaping of all characters including control chars
+    $Body = @{
+        type      = $MarkerType
+        source    = $Source
+        collector = "powershell3/1.0"
+        timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ")
+    }
+    if ($ScanId) { $Body["scan_id"] = $ScanId }
+    if ($Stats)  { $Body["stats"]   = $Stats  }
+
+    try {
+        $JsonBody = $Body | ConvertTo-Json -Compress
+        $Response = Invoke-WebRequest -Uri $MarkerUrl -Method Post `
+            -ContentType "application/json" -Body $JsonBody `
+            -UseBasicParsing -TimeoutSec 10 -ErrorAction Stop
+        $ResponseData = $Response.Content | ConvertFrom-Json
+        return $ResponseData.scan_id
+    } catch {
+        $HttpStatus = $null
+        if ($_.Exception.Response) {
+            $HttpStatus = $_.Exception.Response.StatusCode.value__
+        }
+        # 404 or 501 means the server doesn't support collection markers -- not fatal
+        if ($HttpStatus -eq 404 -or $HttpStatus -eq 501) {
+            Write-Log "Collection marker endpoint not supported by server (HTTP $HttpStatus)" -Level "Debug"
+            return ""
+        }
+        # For other errors, log and optionally treat as fatal
+        Write-Log "Collection marker '$MarkerType' failed: $_" -Level "Warning"
+        if ($Fatal) {
+            throw $_
+        }
+        return ""
+    }
+}
 
 # ---------------------------------------------------------------------
 # Run THOR Thunderstorm Collector -------------------------------------
 # ---------------------------------------------------------------------
 $ProgressPreference = "SilentlyContinue"
+$FilesScanned = 0
+$FilesSubmitted = 0
+$FilesSkipped = 0
+$FilesFailed = 0
+# Use a reference object for cross-runspace signal communication
+$script:InterruptSignal = New-Object PSObject -Property @{ Value = $False }
+$global:Interrupted = $False
+
+# Progress reporting: auto-detect TTY unless overridden
+$ShowProgress = $False
+if ($Progress) {
+    $ShowProgress = $True
+} elseif ($NoProgress) {
+    $ShowProgress = $False
+} else {
+    try {
+        # Auto-detect: show progress if stdout is a terminal
+        if ([Environment]::UserInteractive -and [Console]::WindowWidth -gt 0) {
+            $ShowProgress = $True
+        }
+    } catch {
+        $ShowProgress = $False
+    }
+}
+$TotalFiles = 0
+if ($ShowProgress) {
+    # Pre-count files for progress percentage (best effort)
+    # Skip pre-count for root/large directories to avoid long startup delay
+    $SkipPreCount = $False
+    try {
+        $FolderNormalized = (Resolve-Path $Folder -ErrorAction SilentlyContinue).Path
+        # Skip pre-count for drive roots (e.g. C:\, D:\)
+        if ($FolderNormalized -match '^[A-Za-z]:\\?$') {
+            $SkipPreCount = $True
+        }
+    } catch {
+        $SkipPreCount = $True
+    }
+    if (-not $SkipPreCount) {
+        try {
+            $PreCountErrors = @()
+            $TotalFiles = @(Get-ChildItem -Path $Folder -Recurse -Force -ErrorAction SilentlyContinue -ErrorVariable PreCountErrors | Where-Object { -not $_.PSIsContainer }).Count
+        } catch {
+            $TotalFiles = 0
+        }
+    }
+}
+
+
+# Register handler for Ctrl+C (SIGINT) using a C# helper for static event subscription
+[Console]::TreatControlCAsInput = $False
 try {
-    Get-ChildItem -Path $Folder -File -Recurse -ErrorAction SilentlyContinue |
-    ForEach-Object {
+    if (-not ([System.Management.Automation.PSTypeName]'SigIntHandler').Type) {
+        Add-Type @"
+using System;
+public static class SigIntHandler {
+    public static volatile bool Interrupted = false;
+    private static bool _registered = false;
+    public static void Register() {
+        if (_registered) return;
+        _registered = true;
+        Console.CancelKeyPress += delegate(object sender, ConsoleCancelEventArgs e) {
+            e.Cancel = true;
+            Interrupted = true;
+        };
+    }
+}
+"@
+    }
+    [SigIntHandler]::Register()
+} catch {
+    # CancelKeyPress registration not available on all platforms (e.g. non-interactive)
+    Write-Log "SIGINT handler registration not available: $_" -Level "Debug"
+}
+
+
+# Send collection begin marker (with single retry after 2s on connection failure)
+$ScanId = ""
+$BeginMarkerSuccess = $False
+try {
+    $ScanId = Send-CollectionMarker -MarkerType "begin" -Fatal
+    $BeginMarkerSuccess = $True
+} catch {
+    # Check if this is a connection error (no HTTP response) vs an HTTP error from a reachable server
+    $BeginHttpStatus = $null
+    $BeginWebException = $null
+    # Unwrap to find the WebException
+    if ($_.Exception -is [System.Net.WebException]) {
+        $BeginWebException = $_.Exception
+    } elseif ($_.Exception.InnerException -is [System.Net.WebException]) {
+        $BeginWebException = $_.Exception.InnerException
+    }
+    if ($BeginWebException -and $BeginWebException.Response) {
+        $BeginHttpStatus = [int]$BeginWebException.Response.StatusCode
+    }
+    # Treat as connection failure if no HTTP status was obtained
+    $IsConnectionFailure = ($null -eq $BeginHttpStatus -or $BeginHttpStatus -eq 0)
+    # Also treat WebException transport-level statuses as connection failures
+    if (-not $IsConnectionFailure -and $BeginWebException) {
+        $WeStatus = $BeginWebException.Status
+        if ($WeStatus -eq [System.Net.WebExceptionStatus]::ConnectFailure -or
+            $WeStatus -eq [System.Net.WebExceptionStatus]::NameResolutionFailure -or
+            $WeStatus -eq [System.Net.WebExceptionStatus]::Timeout -or
+            $WeStatus -eq [System.Net.WebExceptionStatus]::ConnectionClosed -or
+            $WeStatus -eq [System.Net.WebExceptionStatus]::SendFailure) {
+            $IsConnectionFailure = $True
+        }
+    }
+    if ($IsConnectionFailure) {
+        # Connection failure -- retry once after 2s
+        Write-Log "Begin marker failed (connection error), retrying in 2 seconds..." -Level "Warning"
+        Start-Sleep -Seconds 2
+        try {
+            $ScanId = Send-CollectionMarker -MarkerType "begin" -Fatal
+            $BeginMarkerSuccess = $True
+        } catch {
+            Write-Log "Cannot connect to Thunderstorm server at $BaseUrl : $_" -Level "Error"
+            exit 2
+        }
+    } else {
+        # Server is reachable but returned an HTTP error -- log warning and continue without scan_id
+        Write-Log "Begin marker returned HTTP $BeginHttpStatus -- continuing without scan_id" -Level "Warning"
+    }
+}
+if ($ScanId) {
+    Write-Log "Collection scan_id: $ScanId"
+    if ($Url.Contains("?")) {
+        $Url = "$Url&scan_id=$([uri]::EscapeDataString($ScanId))"
+    } else {
+        $Url = "$Url`?scan_id=$([uri]::EscapeDataString($ScanId))"
+    }
+}
+
+
+
+$EnumErrors = @()
+try {
+    $FileList = @(Get-ChildItem -Path $Folder -Recurse -Force -ErrorAction SilentlyContinue -ErrorVariable EnumErrors | Where-Object { -not $_.PSIsContainer })
+    # Set total file count from actual enumeration for accurate progress reporting
+    if ($ShowProgress) {
+        $TotalFiles = $FileList.Count
+    }
+    if ($EnumErrors.Count -gt 0) {
+        foreach ($enumErr in $EnumErrors) {
+            Write-Log "Traversal error: $($enumErr.Exception.Message)" -Level "Warning"
+        }
+        Write-Log "Directory traversal encountered $($EnumErrors.Count) error(s) - some paths may not have been scanned" -Level "Warning"
+        $FilesFailed += $EnumErrors.Count
+    }
+
+    foreach ($CurrentFile in $FileList) {
+        # Check for interruption (from C# SIGINT handler or direct flag)
+        $SigIntFired = $False
+        try { $SigIntFired = [SigIntHandler]::Interrupted } catch {}
+        if ($SigIntFired -or $global:Interrupted) {
+            $global:Interrupted = $True
+            Write-Log "Interrupted by user signal" -Level "Warning"
+            break
+        }
+
         # -------------------------------------------------------------
         # Filter ------------------------------------------------------
+        $FilesScanned++
+        # Progress reporting
+        if ($ShowProgress -and $TotalFiles -gt 0) {
+            $Pct = [math]::Round(($FilesScanned / $TotalFiles) * 100, 0)
+            Write-Host -NoNewline "`r[${FilesScanned}/${TotalFiles}] ${Pct}%   "
+        }
+
+
         # Size Check
-        if ( ( $_.Length / 1MB ) -gt $($MaxSize) ) {
-            Write-Log "$_ skipped due to size filter" -Level "Debug"
-            return
+        if ( ( $CurrentFile.Length / 1MB ) -gt $($MaxSize) ) {
+            Write-Log "$CurrentFile skipped due to size filter" -Level "Debug"
+            $FilesSkipped++
+            continue
         }
-        # Age Check
-        if ( $($MaxAge) -gt 0 ) {
-            if ( $_.LastWriteTime -lt (Get-Date).AddDays(-$($MaxAge)) ) {
-                Write-Log "$_ skipped due to age filter" -Level "Debug"
-                return
+        # Age Check (file passes if either created or modified within MaxAge days)
+        if ( $MaxAge -gt 0 ) {
+            $AgeThreshold = (Get-Date).AddDays(-$MaxAge)
+            $NewestTime = if ($CurrentFile.CreationTime -gt $CurrentFile.LastWriteTime) { $CurrentFile.CreationTime } else { $CurrentFile.LastWriteTime }
+            if ( $NewestTime -lt $AgeThreshold ) {
+                Write-Log "$CurrentFile skipped due to age filter" -Level "Debug"
+                $FilesSkipped++
+                continue
             }
         }
-        # Extensions Check
-        if ( $Extensions.Length -gt 0 ) {
-            if ( $Extensions -contains $_.extension ) { } else {
-                Write-Log "$_ skipped due to extension filter" -Level "Debug"
-                return
+
+        # Extensions Check (case-insensitive)
+        if ( $ActiveExtensions.Length -gt 0 ) {
+            $FileExt = $CurrentFile.Extension.ToLowerInvariant()
+            if ( -not ($ActiveExtensions -contains $FileExt) ) {
+                Write-Log "$CurrentFile skipped due to extension filter" -Level "Debug"
+                $FilesSkipped++
+                continue
             }
         }
 
+
         # -------------------------------------------------------------
         # Submission --------------------------------------------------
 
-        Write-Log "Processing $($_.FullName) ..." -Level "Debug"
-        # Reading the file data & preparing the request
+        Write-Log "Processing $($CurrentFile.FullName) ..." -Level "Debug"
+        $boundary = "----ThunderstormBoundary" + [System.Guid]::NewGuid().ToString("N")
+
+        $CRLF = "`r`n"
+        $SafeFileName = $CurrentFile.FullName -replace '[\r\n]','' -replace '"','\"'
+
+        # File part — the full path goes in the Content-Disposition filename
+        $headerText = "--$boundary$CRLF" +
+            "Content-Disposition: form-data; name=`"file`"; filename=`"$SafeFileName`"$CRLF" +
+            "Content-Type: application/octet-stream$CRLF$CRLF"
+
+        $footerText = "$CRLF--$boundary--$CRLF"
+
+        $headerBytes = [System.Text.Encoding]::UTF8.GetBytes($headerText)
+        $footerBytes = [System.Text.Encoding]::UTF8.GetBytes($footerText)
+
+        # Pre-check file readability before attempting upload
+        $fileLength = 0
         try {
-            $fileBytes = [System.IO.File]::ReadAllBytes("$($_.FullName)");
+            $fileLength = $CurrentFile.Length
+            # Quick open/close to verify readability
+            $testStream = [System.IO.File]::OpenRead($CurrentFile.FullName)
+            $testStream.Dispose()
         } catch {
             Write-Log "Read Error: $_" -Level "Error"
+            $FilesFailed++
+            continue
         }
-        $boundary = [System.Guid]::NewGuid().ToString();
-        $LF = "`r`n";
-
-       # Build header and footer as byte arrays
-        $headerStr = "--$boundary$LF" +
-                     "Content-Disposition: form-data; name=`"file`"; filename=`"$($_.FullName)`"$LF" +
-                     "Content-Type: application/octet-stream$LF$LF"
-        $footerStr = "$LF--$boundary--$LF"
-
-        $headerBytes = [System.Text.Encoding]::UTF8.GetBytes($headerStr)
-        $footerBytes = [System.Text.Encoding]::UTF8.GetBytes($footerStr)
-
-        # Construct the request body without re-encoding the raw file data
-        $bodyBytes = New-Object byte[] ($headerBytes.Length + $fileBytes.Length + $footerBytes.Length)
-        [Buffer]::BlockCopy($headerBytes, 0, $bodyBytes, 0,                                          $headerBytes.Length)
-        [Buffer]::BlockCopy($fileBytes,   0, $bodyBytes, $headerBytes.Length,                        $fileBytes.Length)
-        [Buffer]::BlockCopy($footerBytes, 0, $bodyBytes, $headerBytes.Length + $fileBytes.Length,    $footerBytes.Length)
 
         # Submitting the request
         $StatusCode = 0
         $Retries = 0
-        while ( $($StatusCode) -ne 200 ) {
+        $MaxRetries = 3
+        $Max503Retries = 10
+        $Retries503 = 0
+
+        while ( $StatusCode -lt 200 -or $StatusCode -ge 300 ) {
+            $fileStream = $null
+            $requestStream = $null
             try {
-                Write-Log "Submitting to Thunderstorm server: $($_.FullName) ..." -Level "Info"
-                $Response = Invoke-WebRequest -uri $($Url) -Method Post -ContentType "multipart/form-data; boundary=`"$boundary`"" -Body $bodyBytes
-                $StatusCode = [int]$Response.StatusCode
+                Write-Log "Submitting to Thunderstorm server: $($CurrentFile.FullName) ..." -Level "Info"
+
+                # Stream the multipart body directly to the request to avoid double-buffering
+                $ContentLength = $headerBytes.Length + $fileLength + $footerBytes.Length
+                $WebRequest = [System.Net.HttpWebRequest]::Create($Url)
+                $WebRequest.Method = "POST"
+                $WebRequest.ContentType = "multipart/form-data; boundary=$boundary"
+                $WebRequest.ContentLength = $ContentLength
+                $WebRequest.Timeout = 300000
+                $WebRequest.AllowWriteStreamBuffering = $False
+
+                $requestStream = $WebRequest.GetRequestStream()
+                
+                $requestStream.Write($headerBytes, 0, $headerBytes.Length)
+
+                # Stream file content directly to request stream
+                $fileStream = [System.IO.File]::OpenRead($CurrentFile.FullName)
+                $copyBuffer = New-Object byte[] 81920
+                $bytesRead = 0
+                while (($bytesRead = $fileStream.Read($copyBuffer, 0, $copyBuffer.Length)) -gt 0) {
+                    $requestStream.Write($copyBuffer, 0, $bytesRead)
+                }
+                $fileStream.Dispose()
+                $fileStream = $null
+
+                $requestStream.Write($footerBytes, 0, $footerBytes.Length)
+                $requestStream.Dispose()
+                $requestStream = $null
+
+                $WebResponse = $WebRequest.GetResponse()
+                $StatusCode = [int]$WebResponse.StatusCode
+                $WebResponse.Close()
+                $FilesSubmitted++
             }
-            # Catch all non 200 status codes
             catch {
-                $StatusCode = $_.Exception.Response.StatusCode.value__
+                if ($fileStream) { try { $fileStream.Dispose() } catch {} }
+                if ($requestStream) { try { $requestStream.Dispose() } catch {} }
+
+                $ErrorResponse = $null
+                $StatusCode = 0
+                if ($_.Exception -is [System.Net.WebException]) {
+                    $ErrorResponse = $_.Exception.Response
+                    if ($ErrorResponse) {
+                        $StatusCode = [int]$ErrorResponse.StatusCode
+                    }
+                } elseif ($_.Exception.InnerException -is [System.Net.WebException]) {
+                    $ErrorResponse = $_.Exception.InnerException.Response
+                    if ($ErrorResponse) {
+                        $StatusCode = [int]$ErrorResponse.StatusCode
+                    }
+                }
+
                 if ( $StatusCode -eq 503 ) {
+                    $Retries503 = $Retries503 + 1
+                    # Reset non-503 retry counter since server is reachable (just busy)
+                    $Retries = 0
+                    if ( $Retries503 -ge $Max503Retries ) {
+                        $FilesFailed++
+                        Write-Log "503: Server still busy after $Max503Retries retries - giving up on $($CurrentFile.FullName)" -Level "Warning"
+                        break
+                    }
                     $WaitSecs = 3
-                    if ( $_.Exception.Response.Headers['Retry-After'] ) {
-                        $WaitSecs = [int]$_.Exception.Response.Headers['Retry-After']
+                    try {
+                        $RetryAfterVal = $null
+                        if ($ErrorResponse) {
+                            $RetryAfterVal = $ErrorResponse.Headers['Retry-After']
+                        }
+                        if ($RetryAfterVal) {
+                            $WaitSecs = [int]$RetryAfterVal
+                            if ($WaitSecs -lt 1) { $WaitSecs = 3 }
+                            if ($WaitSecs -gt 300) { $WaitSecs = 300 }
+                        }
+                    } catch {
+                        $WaitSecs = 3
                     }
-                    Write-Log "503: Server seems busy - retrying in $($WaitSecs) seconds"
+
+                    Write-Log "503: Server seems busy - retrying in $($WaitSecs) seconds ($Retries503/$Max503Retries)"
                     Start-Sleep -Seconds $($WaitSecs)
-                } else {
-                    if ( $Retries -eq 3) {
-                        Write-Log "$($StatusCode): Server still has problems - giving up"
+                } elseif ( $StatusCode -eq 0 ) {
+                    # Connection/transport error (no HTTP response)
+                    $Retries = $Retries + 1
+                    if ( $Retries -gt $MaxRetries ) {
+                        $FilesFailed++
+                        Write-Log "Connection error: giving up on $($CurrentFile.FullName) after $MaxRetries retries - $_" -Level "Warning"
                         break
                     }
+                    $SleepTime = [Math]::Pow(2, $Retries)
+                    Write-Log "Connection error - retrying in $SleepTime seconds ($Retries/$MaxRetries): $_"
+                    Start-Sleep -Seconds $($SleepTime)
+                } else {
                     $Retries = $Retries + 1
-                    $SleepTime = 2 * [Math]::Pow(2, $Retries)
+                    if ( $Retries -gt $MaxRetries ) {
+                        $FilesFailed++
+                        Write-Log "$($StatusCode): Server still has problems - giving up on $($CurrentFile.FullName)" -Level "Warning"
+                        break
+                    }
+
+                    $SleepTime = [Math]::Pow(2, $Retries)
+
                     Write-Log "$($StatusCode): Server has problems - retrying in $SleepTime seconds"
                     Start-Sleep -Seconds $($SleepTime)
                 }
@@ -322,12 +839,60 @@ try {
         }
      }
 } catch {
-    Write-Log "Unknown error during Thunderstorm Collection $_" -Level "Error"
+    Write-Log "Fatal error during Thunderstorm Collection: $_" -Level "Error"
+    # Send interrupted marker on fatal error
+    try {
+        Send-CollectionMarker -MarkerType "interrupted" -ScanId $ScanId -Stats @{
+            scanned   = $FilesScanned
+            submitted = $FilesSubmitted
+            skipped   = $FilesSkipped
+            failed    = $FilesFailed
+        } | Out-Null
+    } catch {
+        Write-Log "Failed to send interrupted marker: $_" -Level "Warning"
+    }
+    exit 2
 }
 
+
+
 # ---------------------------------------------------------------------
 # End -----------------------------------------------------------------
 # ---------------------------------------------------------------------
+# Clear progress line if active
+if ($ShowProgress -and $TotalFiles -gt 0) {
+    Write-Host "`r$(' ' * 40)`r" -NoNewline
+}
 $ElapsedTime = $(get-date) - $StartTime
+
 $TotalTime = "{0:HH:mm:ss}" -f ([datetime]$elapsedTime.Ticks)
-Write-Log "Scan took $($TotalTime) to complete" -Level "Information"
+Write-Log "Scan took $($TotalTime) to complete" -Level "Info"
+Write-Log "Results: scanned=$FilesScanned submitted=$FilesSubmitted skipped=$FilesSkipped failed=$FilesFailed"
+
+# Send collection marker with stats
+$EndStats = @{
+    scanned          = $FilesScanned
+    submitted        = $FilesSubmitted
+    skipped          = $FilesSkipped
+    failed           = $FilesFailed
+    elapsed_seconds  = [int]$ElapsedTime.TotalSeconds
+}
+
+$SigIntFired = $False
+try { $SigIntFired = [SigIntHandler]::Interrupted } catch {}
+if ($SigIntFired -or $global:Interrupted) {
+    $global:Interrupted = $True
+    Send-CollectionMarker -MarkerType "interrupted" -ScanId $ScanId -Stats $EndStats | Out-Null
+    Write-Log "Collection was interrupted by user" -Level "Warning"
+    exit 1
+} else {
+
+    Send-CollectionMarker -MarkerType "end" -ScanId $ScanId -Stats $EndStats | Out-Null
+}
+
+# Exit with appropriate code
+if ($FilesFailed -gt 0) {
+    exit 1
+} else {
+    exit 0
+}
diff --git a/scripts/thunderstorm-collector.py b/scripts/thunderstorm-collector.py
index 7c207c6..e4d82cd 100755
--- a/scripts/thunderstorm-collector.py
+++ b/scripts/thunderstorm-collector.py
@@ -1,18 +1,26 @@
 #!/usr/bin/env python3
+# Minimum Python version: 3.4 (no f-strings, no 3.6+ features)
 
 import argparse
 import http.client
+import json
 import os
 import re
+import signal
 import ssl
+import sys
 import time
 import uuid
 import socket
+from urllib.parse import quote
 
 # Configuration
 schema = "http"
-max_age = 14  # in days
-max_size = 20  # in megabytes
+max_age = 14  # in days (overridden by --max-age)
+max_size = 2048  # in KB (overridden by --max-size-kb)
+sync_mode = False
+dry_run = False
+retries = 3
 skip_elements = [
     r"^\/proc",
     r"^\/mnt",
@@ -24,7 +32,57 @@
     r"\.vmsd$",
     r"\.lck$",
 ]
-hard_skips = ["/proc", "/dev", "/sys"]
+hard_skips = [
+    "/proc", "/dev", "/sys", "/run",
+    "/snap", "/.snapshots",
+    "/sys/kernel/debug", "/sys/kernel/slab", "/sys/kernel/tracing",
+]
+
+# Network and special filesystem types to exclude via /proc/mounts
+NETWORK_FS_TYPES = {"nfs", "nfs4", "cifs", "smbfs", "smb3", "sshfs", "fuse.sshfs",
+                    "afp", "webdav", "davfs2", "fuse.rclone", "fuse.s3fs"}
+SPECIAL_FS_TYPES = {"proc", "procfs", "sysfs", "devtmpfs", "devpts",
+                    "cgroup", "cgroup2", "pstore", "bpf", "tracefs", "debugfs",
+                    "securityfs", "hugetlbfs", "mqueue", "autofs",
+                    "fusectl", "rpc_pipefs", "nsfs", "configfs", "binfmt_misc",
+                    "selinuxfs", "efivarfs", "ramfs"}
+
+# Cloud storage folder names (lowercase for comparison)
+CLOUD_DIR_NAMES = {"onedrive", "dropbox", ".dropbox", "googledrive", "google drive",
+                   "icloud drive", "iclouddrive", "nextcloud", "owncloud", "mega",
+                   "megasync", "tresorit", "tresorit drive", "syncthing"}
+
+
+def get_excluded_mounts():
+    """Parse /proc/mounts and return mount points for network/special filesystems."""
+    excluded = []
+    try:
+        with open("/proc/mounts", "r") as f:
+            for line in f:
+                parts = line.split()
+                if len(parts) >= 3:
+                    mount_point, fs_type = parts[1], parts[2]
+                    if fs_type in NETWORK_FS_TYPES or fs_type in SPECIAL_FS_TYPES:
+                        excluded.append(mount_point)
+    except (IOError, OSError):
+        pass
+    return excluded
+
+
+def is_cloud_path(filepath):
+    """Check if a path contains a known cloud storage folder name."""
+    segments = filepath.replace("\\", "/").lower().split("/")
+    for seg in segments:
+        if seg in CLOUD_DIR_NAMES:
+            return True
+        # Dynamic patterns: "onedrive - orgname", "onedrive-tenant", "nextcloud-account"
+        if seg.startswith("onedrive - ") or seg.startswith("onedrive-") or seg.startswith("nextcloud-"):
+            return True
+    # macOS: ~/Library/CloudStorage
+    if "/library/cloudstorage" in filepath.lower():
+        return True
+    return False
+
 
 # Composed values
 current_date = time.time()
@@ -32,148 +90,419 @@
 # Stats
 num_submitted = 0
 num_processed = 0
+num_failed = 0
+total_files_estimate = 0
+upload_in_flight = None  # Path of file currently being uploaded, or None
 
 # URL to use for submission
 api_endpoint = ""
 
+# scan_id at module level for signal handler
+scan_id = None
+
 # Original args
-args = {}
+args = None
+
+# Progress reporting
+show_progress = None  # None = auto-detect TTY
+
+
+def print_error(msg):
+    """Print error messages to stderr."""
+    sys.stderr.write(msg + "\n")
+    sys.stderr.flush()
+
+
+def print_progress(processed, total):
+    """Print progress indicator if enabled. Shows files examined (not just submitted)."""
+    if not show_progress:
+        return
+    if total > 0 and processed <= total:
+        pct = min(100, int(processed * 100 / total))
+        sys.stderr.write("\r[{}/{} examined] {}%".format(processed, total, pct))
+        sys.stderr.flush()
+    else:
+        # Total is zero or processed exceeded estimate; show count only
+        sys.stderr.write("\r[{} examined]".format(processed))
+        sys.stderr.flush()
+
+
+def is_under_excluded(path):
+    """Check if a normalized path is equal to or under any hard_skips entry."""
+    norm = os.path.normpath(path)
+    for excluded in hard_skips:
+        if norm == excluded or norm.startswith(excluded + os.sep):
+            return True
+    return False
+
+
+def should_prune_dir(dirpath, dirname):
+    """Determine if a subdirectory should be pruned from traversal."""
+    full = os.path.join(dirpath, dirname)
+    if os.path.islink(full):
+        return True
+    if is_under_excluded(full):
+        return True
+    if is_cloud_path(full):
+        return True
+    return False
+
+
+def count_files(dirs):
+    """Quick count of files for progress reporting."""
+    count = 0
+    for d in dirs:
+        for dirpath, dirnames, filenames in os.walk(d, followlinks=False):
+            dirnames[:] = [
+                dn for dn in dirnames
+                if not should_prune_dir(dirpath, dn)
+            ]
+            for name in filenames:
+                filepath = os.path.join(dirpath, name)
+                if os.path.islink(filepath):
+                    continue
+                count += 1
+    return count
 
 # Functions
-def process_dir(workdir):
-    startdir = os.getcwd()
-    os.chdir(workdir)
+def send_interrupted_marker():
+    """Send an interrupted collection marker with current stats."""
+    global scan_id, num_processed, num_submitted, num_failed, current_date, upload_in_flight
+    try:
+        end_date = time.time()
+        elapsed = int(end_date - current_date)
+        stats = {
+            "scanned": num_processed,
+            "submitted": num_submitted,
+            "failed": num_failed,
+            "elapsed_seconds": elapsed,
+        }
+        if upload_in_flight is not None:
+            stats["in_flight"] = upload_in_flight
+        collection_marker(
+            args.server, args.port, args.tls, args.insecure,
+            args.source, "0.1",
+            "interrupted",
+            scan_id=scan_id,
+            ca_cert=getattr(args, 'ca_cert', None),
+            stats=stats,
+        )
+    except Exception:
+        pass
 
-    for name in os.listdir("."):
-        filepath = os.path.join(workdir, name)
 
-        # Hard skips
-        if filepath in hard_skips:
-            continue
+def signal_handler(signum, frame):
+    """Handle SIGINT/SIGTERM: send interrupted marker and exit."""
+    print_error("\n[INFO] Signal received, sending interrupted marker...")
+    send_interrupted_marker()
+    sys.exit(1)
 
-        # Skip symlinks
-        # TODO: revisit on how to upload symlinks to thunderstorm
-        if os.path.islink(filepath):
-            continue
 
-        # Directory
-        if os.path.isdir(filepath):
-            process_dir(filepath)
-            continue
+def process_dir(workdir):
 
-        # File
-        if args.debug:
-            print("[DEBUG] Checking {} ...".format(filepath))
+    for dirpath, dirnames, filenames in os.walk(workdir, followlinks=False):
+        # Hard skip directories (modify in-place to prevent descent)
+        dirnames[:] = [
+            d for d in dirnames
+            if not should_prune_dir(dirpath, d)
+        ]
 
-        # Count
-        global num_processed
-        num_processed += 1
+        for name in filenames:
+            filepath = os.path.join(dirpath, name)
 
-        # Skip files
-        if skip_file(filepath):
-            continue
+            # Skip symlinks
+            if os.path.islink(filepath):
+                continue
+
+            if args.debug:
+                print_error("[DEBUG] Checking {} ...".format(filepath))
 
-        # Submit
-        submit_sample(filepath)
+            # Count
+            global num_processed
+            num_processed += 1
 
-    os.chdir(startdir)
+            # Progress
+            print_progress(num_processed, total_files_estimate)
+
+            # Skip files
+            skip, file_stat = skip_file(filepath)
+            if skip:
+                continue
+
+            # Submit
+            submit_sample(filepath, file_stat)
 
 
 def skip_file(filepath):
+    """Check if a file should be skipped. Returns (True, None) to skip,
+    or (False, stat_result) to process."""
     # Regex skips
     for pattern in skip_elements:
         if re.search(pattern, filepath):
             if args.debug:
-                print(
+                print_error(
                     "[DEBUG] Skipping file due to configured skip_file exclusion {}".format(
                         filepath
                     )
                 )
-            return True
+            return True, None
 
-    # Size
-    if os.path.getsize(filepath) > max_size * 1024 * 1024:
+    # Stat the file once to avoid TOCTOU races
+    try:
+        st = os.stat(filepath)
+    except (OSError, IOError):
         if args.debug:
-            print("[DEBUG] Skipping file due to size {}".format(filepath))
-        return True
+            print_error("[DEBUG] Skipping unreadable file {}".format(filepath))
+        return True, None
+
+    file_size = st.st_size
+    mtime = st.st_mtime
+
+    # Size (max_size is in KB)
+    if file_size > max_size * 1024:
+        if args.debug:
+            print_error("[DEBUG] Skipping file due to size {}".format(filepath))
+        return True, None
 
     # Age
-    mtime = os.path.getmtime(filepath)
     if mtime < current_date - (max_age * 86400):
         if args.debug:
-            print("[DEBUG] Skipping file due to age {}".format(filepath))
-        return True
+            print_error("[DEBUG] Skipping file due to age {}".format(filepath))
+        return True, None
 
-    return False
+    return False, st
 
 
-def submit_sample(filepath):
-    print("[SUBMIT] Submitting {} ...".format(filepath))
+def _make_connection(server, port, tls, insecure, ca_cert=None, timeout=30):
+    """Create an HTTP(S) connection with proper TLS settings."""
+    if tls:
+        if insecure:
+            context = ssl._create_unverified_context()
+        elif ca_cert:
+            context = ssl.create_default_context(cafile=ca_cert)
+        else:
+            context = ssl.create_default_context()
+        return http.client.HTTPSConnection(server, port, context=context, timeout=timeout)
+    else:
+        return http.client.HTTPConnection(server, port, timeout=timeout)
 
-    headers = {
-        "Content-Type": "application/octet-stream",
-        "Content-Disposition": f"attachment; filename={filepath}",
-    }
 
-    try:
+def submit_sample(filepath, file_stat=None):
+    global num_submitted, num_failed, upload_in_flight
 
-        with open(filepath, "rb") as f:
-            data = f.read()
-
-    except Exception as e:
-        print("[ERROR] Could not read '{}' - {}".format(filepath, e))
+    if dry_run:
+        sys.stderr.write("[DRY-RUN] Would submit {} ...\n".format(filepath))
+        num_submitted += 1
         return
 
+    sys.stderr.write("[SUBMIT] Submitting {} ...\n".format(filepath))
+    upload_in_flight = filepath
+
+    # Get file size for streaming upload (use cached stat if available)
+    if file_stat is not None:
+        file_size = file_stat.st_size
+    else:
+        try:
+            file_size = os.path.getsize(filepath)
+        except (OSError, IOError) as e:
+            print_error("[ERROR] Could not stat '{}' - {}".format(filepath, e))
+            num_failed += 1
+            upload_in_flight = None
+            return
+
     boundary = str(uuid.uuid4())
     headers = {
-        "Content-Type": f"multipart/form-data; boundary={boundary}",
+        "Content-Type": "multipart/form-data; boundary={}".format(boundary),
     }
 
-    # Create multipart/form-data payload
-    payload = (
-        f"--{boundary}\r\n"
-        f'Content-Disposition: form-data; name="file"; filename="{filepath}"\r\n'
-        f"Content-Type: application/octet-stream\r\n\r\n"
-    ).encode("utf-8")
-    payload += data
-    payload += f"\r\n--{boundary}--\r\n".encode("utf-8")
-
-    retries = 0
-    while retries < 3:
-        try:
-            if args.tls:
-                if args.insecure:
-                    context = ssl._create_unverified_context()
-                else:
-                    context = ssl._create_default_https_context()
-                conn = http.client.HTTPSConnection(args.server, args.port, context=context)
-            else:
-                conn = http.client.HTTPConnection(args.server, args.port)
-            conn.request("POST", api_endpoint, body=payload, headers=headers)
+    # Sanitize filename for multipart header safety
+    safe_filename = filepath.replace('\\', '/').replace('"', '_').replace(';', '_').replace('\r', '_').replace('\n', '_').replace('\x00', '_')
 
-            resp = conn.getresponse()
+    # Build multipart preamble (file field header) and epilogue
+    preamble = b""
+
+    # file field header
+    preamble += (
+        "--{boundary}\r\n"
+        "Content-Disposition: form-data; name=\"file\"; filename=\"{filename}\"\r\n"
+        "Content-Type: application/octet-stream\r\n\r\n"
+    ).format(boundary=boundary, filename=safe_filename).encode("utf-8")
+
+    epilogue = "\r\n--{}--\r\n".format(boundary).encode("utf-8")
 
+    content_length = len(preamble) + file_size + len(epilogue)
+    headers["Content-Length"] = str(content_length)
+
+    CHUNK_SIZE = 65536
+
+    attempt = 0
+    while attempt < retries:
+        resp_status = None
+        resp_reason = None
+        resp_retry_after = None
+        file_fully_sent = False
+        conn = None
+        try:
+            conn = _make_connection(
+                args.server, args.port, args.tls, args.insecure,
+                ca_cert=getattr(args, 'ca_cert', None)
+            )
+            conn.putrequest("POST", api_endpoint)
+            for hdr_name, hdr_val in headers.items():
+                conn.putheader(hdr_name, hdr_val)
+            conn.endheaders()
+            # Send preamble (metadata fields + file header)
+            conn.send(preamble)
+            # Stream file content in chunks
+            with open(filepath, "rb") as f:
+                while True:
+                    chunk = f.read(CHUNK_SIZE)
+                    if not chunk:
+                        break
+                    conn.send(chunk)
+            # Send epilogue
+            conn.send(epilogue)
+            file_fully_sent = True
+            resp = conn.getresponse()
+            # Read response body to allow connection reuse / proper close
+            resp.read()
+            # Store response info before closing connection
+            resp_status = resp.status
+            resp_reason = resp.reason
+            resp_retry_after = resp.getheader("Retry-After", "30")
         except Exception as e:
-            print("[ERROR] Could not submit '{}' - {}".format(filepath, e))
-            retries += 1
-            time.sleep(2 << retries)
+            print_error("[ERROR] Could not submit '{}' - {}".format(filepath, e))
+            attempt += 1
+            if attempt < retries:
+                backoff = min(2 ** (attempt - 1), 60)
+                time.sleep(backoff)
             continue
+        finally:
+            if conn is not None:
+                try:
+                    conn.close()
+                except Exception:
+                    pass
 
         # pylint: disable=no-else-continue
-        if resp.status == 503: # Service unavailable
-            retry_time = resp.headers.get("Retry-After", 30)
+        if resp_status == 503:  # Service unavailable
+            attempt += 1
+            if attempt >= retries:
+                print_error("[ERROR] Server busy after {} retries, giving up on '{}'".format(retries, filepath))
+                num_failed += 1
+                upload_in_flight = None
+                return
+            try:
+                retry_time = max(0, min(int(resp_retry_after), 300))  # Clamp to 0-300s
+            except (ValueError, TypeError):
+                retry_time = 30
             time.sleep(retry_time)
             continue
-        elif resp.status == 200:
-            break
-        print(
-            "[ERROR] HTTP return status: {}, reason: {}".format(
-                resp.status, resp.reason
+        elif 200 <= resp_status < 300:
+            if file_fully_sent:
+                num_submitted += 1
+            else:
+                print_error("[ERROR] File '{}' was not fully sent but server returned {}".format(filepath, resp_status))
+                num_failed += 1
+            upload_in_flight = None
+            return
+        else:
+            print_error(
+                "[ERROR] HTTP return status: {}, reason: {}".format(
+                    resp_status, resp_reason
+                )
             )
-        )
+            attempt += 1
+            if attempt < retries:
+                backoff = min(2 ** (attempt - 1), 60)
+                time.sleep(backoff)
+            continue
 
-    global num_submitted
-    num_submitted += 1
+    # All retries exhausted
+    num_failed += 1
+    upload_in_flight = None
+
+
+def collection_marker(server, port, tls, insecure, source, collector_version, marker_type, scan_id=None, stats=None, ca_cert=None, retry_on_fail=False):
+    """POST a begin/end/interrupted collection marker to /api/collection.
+    Returns the scan_id from the response, or None if unsupported/failed.
+    If retry_on_fail is True, retries once after 2s on failure (for begin marker)."""
+    body = {
+        "type": marker_type,
+        "source": source,
+        "hostname": socket.gethostname(),
+        "collector": "python3/{}".format(collector_version),
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    }
+    # scan_id: None = failure, "" = success with no id, non-empty = valid id
+    if scan_id:
+        body["scan_id"] = scan_id
+    if stats:
+        body["stats"] = stats
+
+    attempts = 2 if retry_on_fail else 1
+    for attempt in range(attempts):
+        try:
+            conn = _make_connection(server, port, tls, insecure, ca_cert=ca_cert, timeout=10)
+            payload = json.dumps(body).encode("utf-8")
+            conn.request("POST", "/api/collection", body=payload,
+                         headers={"Content-Type": "application/json"})
+            resp = conn.getresponse()
+            resp_body = resp.read().decode("utf-8", errors="replace")
+            conn.close()
+            if 200 <= resp.status < 300:
+                if resp_body.strip():
+                    try:
+                        data = json.loads(resp_body)
+                        return data.get("scan_id", "")
+                    except (ValueError, KeyError):
+                        # Non-JSON or missing scan_id; acceptable for end/interrupted markers
+                        return ""
+                else:
+                    return ""
+            elif resp.status == 503 and attempt < attempts - 1:
+                retry_after = None
+                # Try to get Retry-After header
+                if hasattr(resp, 'getheader'):
+                    retry_after = resp.getheader("Retry-After")
+                if retry_after:
+                    try:
+                        wait_time = min(int(retry_after), 300)
+                        if wait_time < 0:
+                            wait_time = 2
+                    except (ValueError, TypeError):
+                        wait_time = 2
+                else:
+                    wait_time = 2
+                print_error("[WARN] Collection marker '{}' got 503, retrying in {}s...".format(marker_type, wait_time))
+                time.sleep(wait_time)
+                continue
+            elif (400 <= resp.status < 500) or resp.status == 501:
+                # 404/501 = endpoint not supported, continue without scan_id but success
+                if resp.status == 404 or resp.status == 501:
+                    print_error("[WARN] Collection marker '{}' not supported (HTTP {}) — server does not implement /api/collection".format(
+                        marker_type, resp.status))
+                    return ""
+                # Other client errors (4xx) indicate configuration problems — no retry
+                print_error("[ERROR] Collection marker '{}' returned HTTP {}".format(marker_type, resp.status))
+                return None
+            else:
+                # Server errors (5xx other than 503) — retry if retry_on_fail
+                if attempt < attempts - 1:
+                    print_error("[WARN] Collection marker '{}' returned HTTP {}, retrying in 2s...".format(marker_type, resp.status))
+                    time.sleep(2)
+                    continue
+                else:
+                    print_error("[ERROR] Collection marker '{}' returned HTTP {}".format(marker_type, resp.status))
+                    return None
+        except Exception as e:
+            if attempt < attempts - 1:
+                print_error("[WARN] Collection marker '{}' failed ({}), retrying in 2s...".format(marker_type, e))
+                time.sleep(2)
+            else:
+                print_error("[ERROR] Collection marker '{}' failed: {}".format(marker_type, e))
+                return None
+    return None
 
 
 # Main
@@ -185,15 +514,15 @@ def submit_sample(filepath):
     parser.add_argument(
         "-d",
         "--dirs",
-        nargs="*",
-        default="/",
+        nargs="+",
+        default=["/"],
         help="Directories that should be scanned. (Default: /)",
     )
     parser.add_argument(
         "-s", "--server", required=True, help="FQDN/IP of the THOR Thunderstorm server."
     )
     parser.add_argument(
-        "-p", "--port", help="Port of the THOR Thunderstorm server. (Default: 8080)"
+        "-p", "--port", type=int, default=8080, help="Port of the THOR Thunderstorm server. (Default: 8080)"
     )
     parser.add_argument(
         "-t",
@@ -210,48 +539,202 @@ def submit_sample(filepath):
     parser.add_argument(
         "-S",
         "--source",
-        default=socket.gethostname(),
-        help="Source identifier to be used in the Thunderstorm submission.",
+        default=None,
+        help="Source identifier to be used in the Thunderstorm submission. (Default: hostname)",
+    )
+    parser.add_argument(
+        "--max-age", type=int, default=14,
+        help="Max file age in days (default: 14)"
     )
+    parser.add_argument(
+        "--max-size-kb", type=int, default=2048,
+        help="Max file size in KB (default: 2048)"
+    )
+    parser.add_argument(
+        "--sync", action="store_true",
+        help="Use /api/check (synchronous) instead of /api/checkAsync"
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Do not upload, only show what would be submitted"
+    )
+    parser.add_argument(
+        "--retries", type=int, default=3,
+        help="Retry attempts per file (default: 3)"
+    )
+    parser.add_argument(
+        "--ca-cert",
+        default=None,
+        help="Path to custom CA certificate bundle for TLS verification."
+    )
+    parser.add_argument(
+        "--progress",
+        action="store_true",
+        dest="progress",
+        help="Force enable progress reporting."
+    )
+    parser.add_argument(
+        "--no-progress",
+        action="store_false",
+        dest="progress",
+        help="Force disable progress reporting."
+    )
+    parser.set_defaults(progress=None)
     parser.add_argument("--debug", action="store_true", help="Enable debug logging.")
 
     args = parser.parse_args()
 
+    # Resolve source lazily (default to hostname)
+    if args.source is None:
+        args.source = socket.gethostname()
+
+    # Validate numeric arguments
+    if args.retries < 1:
+        print_error("[ERROR] --retries must be >= 1, got {}".format(args.retries))
+        sys.exit(2)
+    if args.max_age < 0:
+        print_error("[ERROR] --max-age must be >= 0, got {}".format(args.max_age))
+        sys.exit(2)
+    if args.max_size_kb < 0:
+        print_error("[ERROR] --max-size-kb must be >= 0, got {}".format(args.max_size_kb))
+        sys.exit(2)
+
+    # Install signal handlers
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    # Apply parsed args to module-level config
+    max_age = args.max_age
+    max_size = args.max_size_kb
+    dry_run = args.dry_run
+    retries = args.retries
+    sync_mode = args.sync
+
     if args.tls:
         schema = "https"
 
-    source = ""
+    # Determine progress mode
+    if args.progress is None:
+        show_progress = sys.stderr.isatty()
+    else:
+        show_progress = args.progress
+
+    source_query = "?source={}".format(quote(args.source))
+
+    api_path = "/api/check" if sync_mode else "/api/checkAsync"
+    # api_endpoint is the path+query for http.client (not full URL)
+    api_endpoint = "{}{}".format(api_path, source_query)
+    display_url = "{}://{}:{}{}".format(schema, args.server, args.port, api_endpoint)
+
+    sys.stderr.write("=" * 80 + "\n")
+    sys.stderr.write("   Python Thunderstorm Collector\n")
+    sys.stderr.write("   Florian Roth, Nextron Systems GmbH, 2024\n")
+    sys.stderr.write("\n")
+    sys.stderr.write("=" * 80 + "\n")
+    # Normalize existing hard_skips
+    hard_skips[:] = [os.path.normpath(p) for p in hard_skips]
+    hard_skips_set = set(hard_skips)
+    # Extend hard_skips with mount points of network/special filesystems
+    for mp in get_excluded_mounts():
+        norm_mp = os.path.normpath(mp)
+        if norm_mp not in hard_skips_set:
+            hard_skips.append(norm_mp)
+            hard_skips_set.add(norm_mp)
+
+    sys.stderr.write("Target Directory: {}\n".format(", ".join(args.dirs)))
+    sys.stderr.write("Thunderstorm Server: {}\n".format(args.server))
+    sys.stderr.write("Thunderstorm Port: {}\n".format(args.port))
+    sys.stderr.write("Using API Endpoint: {}\n".format(display_url))
+    sys.stderr.write("Maximum Age of Files: {} days\n".format(max_age))
+    sys.stderr.write("Maximum File Size: {} KB\n".format(max_size))
+    sys.stderr.write("Excluded directories: {}\n".format(", ".join(hard_skips[:10]) + (" ..." if len(hard_skips) > 10 else "")))
     if args.source:
-        source = f"?source={args.source}"
-
-    api_endpoint = "{}://{}:{}/api/checkAsync{}".format(schema, args.server, args.port, source)
-
-    print("=" * 80)
-    print("   Python Thunderstorm Collector")
-    print("   Florian Roth, Nextron Systems GmbH, 2024")
-    print()
-    print("=" * 80)
-    print("Target Directory: {}".format(", ".join(args.dirs)))
-    print("Thunderstorm Server: {}".format(args.server))
-    print("Thunderstorm Port: {}".format(args.port))
-    print("Using API Endpoint: {}".format(api_endpoint))
-    print("Maximum Age of Files: {}".format(max_age))
-    print("Maximum File Size: {} MB".format(max_size))
-    print("Excluded directories: {}".format(", ".join(hard_skips)))
-    print("Source Identifier: {}".format(args.source)) if args.source else None
-    print()
-
-    print("Starting the walk at: {} ...".format(", ".join(args.dirs)))
+        sys.stderr.write("Source Identifier: {}\n".format(args.source))
+    sys.stderr.write("\n")
+
+    # Validate --ca-cert if provided
+    if args.ca_cert and not os.path.isfile(args.ca_cert):
+        print_error("[ERROR] CA certificate file not found: {}".format(args.ca_cert))
+        sys.exit(2)
+
+    # Validate that all requested directories exist
+    valid_dirs = []
+    for d in args.dirs:
+        if not os.path.exists(d):
+            print_error("[ERROR] Directory does not exist: {}".format(d))
+        elif not os.path.isdir(d):
+            print_error("[ERROR] Path is not a directory: {}".format(d))
+        else:
+            valid_dirs.append(d)
+    if not valid_dirs:
+        print_error("[ERROR] No valid directories to scan.")
+        sys.exit(2)
+    if len(valid_dirs) < len(args.dirs):
+        print_error("[WARN] Some directories were invalid and will be skipped.")
+    args.dirs = valid_dirs
+
+    sys.stderr.write("Starting the walk at: {} ...\n".format(", ".join(args.dirs)))
+
+    # Count files for progress reporting
+    if show_progress:
+        sys.stderr.write("Counting files for progress reporting...\n")
+        total_files_estimate = count_files(args.dirs)
+        sys.stderr.write("Estimated files to check: {}\n".format(total_files_estimate))
+
+    # Send collection begin marker (with single retry on failure)
+    scan_id = collection_marker(
+        args.server, args.port, args.tls, args.insecure,
+        args.source, "0.1",
+        "begin",
+        ca_cert=args.ca_cert,
+        retry_on_fail=True
+    )
+    # scan_id: None = failure (fatal), "" = success but no id returned, non-empty = valid id
+    if scan_id is None:
+        print_error("[ERROR] Failed to send begin collection marker. Cannot reach Thunderstorm server.")
+        sys.exit(2)
+    if scan_id:
+        sys.stderr.write("[INFO] Collection scan_id: {}\n".format(scan_id))
+        # Append scan_id to api_endpoint
+        if "?" in api_endpoint:
+            api_endpoint = "{}&scan_id={}".format(api_endpoint, quote(scan_id))
+        else:
+            api_endpoint = "{}?scan_id={}".format(api_endpoint, quote(scan_id))
 
     # Walk directory
     for walkdir in args.dirs:
         process_dir(walkdir)
 
-    # End message
+    # Clear progress line if needed
+    if show_progress and total_files_estimate > 0:
+        sys.stderr.write("\r" + " " * 40 + "\r")
+        sys.stderr.flush()
+
+    # Send collection end marker with stats
     end_date = time.time()
-    minutes = int((end_date - current_date) / 60)
-    print(
-        "Thunderstorm Collector Run finished (Checked: {} Submitted: {} Minutes: {})".format(
-            num_processed, num_submitted, minutes
+    elapsed = int(end_date - current_date)
+    minutes = elapsed // 60
+    collection_marker(
+        args.server, args.port, args.tls, args.insecure,
+        args.source, "0.1",
+        "end",
+        scan_id=scan_id,
+        ca_cert=args.ca_cert,
+        stats={
+            "scanned": num_processed,
+            "submitted": num_submitted,
+            "failed": num_failed,
+            "elapsed_seconds": elapsed,
+        }
+    )
+
+    sys.stderr.write(
+        "Thunderstorm Collector Run finished (Checked: {} Submitted: {} Failed: {} Minutes: {})\n".format(
+            num_processed, num_submitted, num_failed, minutes
         )
     )
+
+    # Exit codes: 0 = success, 1 = partial failure (some uploads failed), 2 = fatal
+    if num_failed > 0:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/scripts/thunderstorm-collector.sh b/scripts/thunderstorm-collector.sh
index 12cb36d..55c9798 100755
--- a/scripts/thunderstorm-collector.sh
+++ b/scripts/thunderstorm-collector.sh
@@ -1,177 +1,1162 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # THOR Thunderstorm Bash Collector
-# Florian Roth
-# September 2025
+# Florian Roth / Nextron Systems
+#
+# Goals:
+# - work on old and new Bash versions (Bash 3+)
+# - handle missing dependencies with fallbacks
+# - degrade gracefully on partial failures
 
-VERSION="0.3.0"
+VERSION="0.5.0"
 
-# Settings ------------------------------------------------------------
+# Defaults --------------------------------------------------------------------
 
-# Log
 LOGFILE="./thunderstorm.log"
 LOG_TO_FILE=1
-LOG_TO_SYSLOG=0 # Log to syslog is set to 'off' by default
+LOG_TO_SYSLOG=0
 LOG_TO_CMDLINE=1
+SYSLOG_FACILITY="user"
 
-# Thunderstorm Server
 THUNDERSTORM_SERVER="ygdrasil.nextron"
 THUNDERSTORM_PORT=8080
 USE_SSL=0
+INSECURE=0
+CA_CERT=""
 ASYNC_MODE=1
 
-# Source
-HOSTNAME=$(hostname -f)
-
-# Target selection 
-declare -a SCAN_FOLDERS=('/root' '/tmp' '/home' '/var' '/usr');  # folders to scan
 MAX_AGE=14
-MAX_FILE_SIZE=2000  # max file size to check in kilobyte, default 2 MB
+MAX_FILE_SIZE_KB=2000
+DEBUG=0
+DRY_RUN=0
+RETRIES=3
+
+UPLOAD_TOOL=""
+declare -a TMP_FILES_ARR=()
+declare -a CURL_EXTRA_OPTS=()
+declare -a WGET_EXTRA_OPTS=()
+
+# Keep defaults simple and stable for Bash 3+.
+SCAN_FOLDERS=('/root' '/tmp' '/home' '/var' '/usr')
+
+FILES_SCANNED=0
+FILES_SUBMITTED=0
+FILES_SKIPPED=0
+FILES_FAILED=0
+TOTAL_FILES=0
+SCAN_ID=""
+
+PROGRESS_MODE=""  # auto (empty), "on", or "off"
+SHOW_PROGRESS=0
+
+SCRIPT_NAME="${0##*/}"
+START_TS="$(date +%s 2>/dev/null || echo 0)"
+SOURCE_NAME=""
+
+# Filesystem exclusions -------------------------------------------------------
+# Pseudo-filesystems, virtual mounts, network shares, and cloud storage that
+# should never be walked. Pruned at the find level for efficiency.
 
-# Debug
-DEBUG=1
+# Hardcoded paths — always excluded
+EXCLUDE_PATHS=(
+    /proc /sys /dev /run
+    /sys/kernel/debug /sys/kernel/slab /sys/kernel/tracing /sys/devices
+    /snap /.snapshots
+)
 
-# Code ----------------------------------------------------------------
+# Network and special filesystem types — mount points with these types are
+# discovered from /proc/mounts and excluded automatically.
+NETWORK_FS_TYPES="nfs nfs4 cifs smbfs smb3 sshfs fuse.sshfs afp webdav davfs2 fuse.rclone fuse.s3fs"
+SPECIAL_FS_TYPES="proc procfs sysfs devtmpfs devpts cgroup cgroup2 pstore bpf tracefs debugfs securityfs hugetlbfs mqueue autofs fusectl rpc_pipefs nsfs configfs binfmt_misc selinuxfs efivarfs ramfs"
+
+# Cloud storage folder names — if any path segment matches (case-insensitive),
+# the directory is pruned. Keep names with embedded spaces separate so the
+# find-level pruning logic does not accidentally exclude generic names such as
+# "Drive" or "Google" on unrelated paths.
+CLOUD_DIR_NAMES="OneDrive Dropbox .dropbox GoogleDrive iCloudDrive Nextcloud ownCloud MEGA MEGAsync Tresorit SyncThing"
+CLOUD_DIR_NAMES_SPACED="Google Drive|iCloud Drive"
+CLOUD_DIR_PATTERNS="OneDrive -|OneDrive-|Nextcloud-"
+
+# get_excluded_mounts: parse /proc/mounts and return mount points for
+# network and special filesystem types (one per line).
+get_excluded_mounts() {
+    [ -r /proc/mounts ] || return 0
+    while IFS=' ' read -r _dev _mp _fstype _rest; do
+        case " $NETWORK_FS_TYPES $SPECIAL_FS_TYPES " in
+            *" $_fstype "*) printf '%s\n' "$_mp" ;;
+        esac
+    done < /proc/mounts
+}
+
+# is_cloud_path: check if a path contains a known cloud storage folder name.
+# Returns 0 (true) if it matches, 1 (false) otherwise.
+is_cloud_path() {
+    local path_lower
+    path_lower="$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')"
+    local name name_lower
+    for name in $CLOUD_DIR_NAMES; do
+        name_lower="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')"
+        case "$path_lower" in
+            *"/$name_lower"/*|*"/$name_lower") return 0 ;;
+        esac
+    done
+    local old_ifs
+    old_ifs="$IFS"
+    IFS='|'
+    for name in $CLOUD_DIR_NAMES_SPACED; do
+        name_lower="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')"
+        case "$path_lower" in
+            *"/$name_lower"/*|*"/$name_lower") IFS="$old_ifs"; return 0 ;;
+        esac
+    done
+    for name in $CLOUD_DIR_PATTERNS; do
+        name_lower="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')"
+        case "$path_lower" in
+            *"/$name_lower"*) IFS="$old_ifs"; return 0 ;;
+        esac
+    done
+    IFS="$old_ifs"
+    # macOS: ~/Library/CloudStorage
+    case "$path_lower" in
+        */library/cloudstorage/*|*/library/cloudstorage) return 0 ;;
+    esac
+    return 1
+}
+
+# Helpers ---------------------------------------------------------------------
+
+timestamp() {
+    date "+%Y-%m-%d_%H:%M:%S" 2>/dev/null || date
+}
+
+cleanup_tmp_files() {
+    local f
+    for f in "${TMP_FILES_ARR[@]}"; do
+        [ -n "$f" ] && [ -f "$f" ] && rm -f "$f"
+    done
+    # Remove fallback temp directory if it exists (created by mktemp_portable)
+    local _fallback_dir="${TMPDIR:-/tmp}/thunderstorm.$$"
+    [ -d "$_fallback_dir" ] && rm -rf "$_fallback_dir"
+}
+
+INTERRUPTED=0
+
+send_interrupted_marker() {
+    if [ "$DRY_RUN" -eq 0 ] && [ -n "$THUNDERSTORM_SERVER" ]; then
+        local _elapsed=0
+        local _now
+        _now="$(date +%s 2>/dev/null || echo "$START_TS")"
+        if [ "$START_TS" -gt 0 ] 2>/dev/null; then
+            _elapsed=$(( _now - START_TS ))
+            [ "$_elapsed" -lt 0 ] && _elapsed=0
+        fi
+        local _stats="\"stats\":{\"scanned\":${FILES_SCANNED},\"submitted\":${FILES_SUBMITTED},\"skipped\":${FILES_SKIPPED},\"failed\":${FILES_FAILED},\"elapsed_seconds\":${_elapsed}}"
+        local _scheme="http"
+        [ "$USE_SSL" -eq 1 ] && _scheme="https"
+        local _base="${_scheme}://${THUNDERSTORM_SERVER}:${THUNDERSTORM_PORT}"
+        _base="${_base%/}"
+        collection_marker "$_base" "interrupted" "${SCAN_ID:-}" "$_stats" >/dev/null 2>&1
+    fi
+}
+
+on_signal() {
+    # Prevent recursive signal handling
+    trap '' INT TERM
+    INTERRUPTED=1
+    log_msg warn "Received signal, sending interrupted marker and exiting..."
+    send_interrupted_marker
+    cleanup_tmp_files
+    # Exit 1 for partial failure (interrupted collection)
+    exit 1
+}
 
-function timestamp {
-  date +%F_%T
+on_exit() {
+    [ "$INTERRUPTED" -eq 0 ] && cleanup_tmp_files
 }
 
-function log {
-    local type="$1"
-    local message="$2"
+trap on_exit EXIT
+trap on_signal INT TERM
+
+log_msg() {
+    local level="$1"
+    shift
+    local message="$*"
     local ts
-    ts=$(timestamp)
+    local logger_prio
+    local clean
+
+    [ "$level" = "debug" ] && [ "$DEBUG" -ne 1 ] && return 0
+
+    ts="$(timestamp)"
+    clean="$message"
+    clean="${clean//$'\r'/ }"
+    clean="${clean//$'\n'/ }"
+
+    if [ "$LOG_TO_FILE" -eq 1 ]; then
+        if ! printf "%s %s %s\n" "$ts" "$level" "$clean" >> "$LOGFILE" 2>/dev/null; then
+            LOG_TO_FILE=0
+            printf "%s warn Could not write to log file '%s'; disabling file logging\n" "$ts" "$LOGFILE" >&2
+        fi
+    fi
+
+    if [ "$LOG_TO_SYSLOG" -eq 1 ] && command -v logger >/dev/null 2>&1; then
+        case "$level" in
+            error) logger_prio="err" ;;
+            warn) logger_prio="warning" ;;
+            debug) logger_prio="debug" ;;
+            *) logger_prio="info" ;;
+        esac
+        logger -p "${SYSLOG_FACILITY}.${logger_prio}" "${SCRIPT_NAME}: ${clean}" >/dev/null 2>&1 || true
+    fi
+
+    if [ "$LOG_TO_CMDLINE" -eq 1 ]; then
+        # Clear progress line before printing log messages to avoid interleaving
+        if [ "$SHOW_PROGRESS" -eq 1 ]; then
+            printf '\r\033[K' >&2
+        fi
+        case "$level" in
+            error|warn)
+                printf "[%s] %s\n" "$level" "$clean" >&2
+                ;;
+            *)
+                printf "[%s] %s\n" "$level" "$clean"
+                ;;
+        esac
+    fi
+}
+
+die() {
+    log_msg error "$*"
+    exit 2
+}
+
+print_banner() {
+    cat <<EOF
+==============================================================
+    ________                __            __
+   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _
+    / / / _ \\/ // / _ \\/ _  / -_) __(_-</ __/ _ \\/ __/  ' \\
+   /_/ /_//_/\\_,_/_//_/\\_,_/\\__/_/ /___/\\__/\\___/_/ /_/_/_/
+   v${VERSION}
+
+   THOR Thunderstorm Collector for Linux/Unix
+==============================================================
+EOF
+}
+
+print_help() {
+    cat <<'EOF'
+Usage:
+  thunderstorm-collector.sh [options]
+
+Options:
+  -s, --server <host>        Thunderstorm server hostname or IP
+  -p, --port <port>          Thunderstorm port (default: 8080)
+  -d, --dir <path>           Directory to scan (repeatable)
+  --max-age <days>           Max file age in days (default: 14)
+  --max-size-kb <kb>         Max file size in KB (default: 2000)
+  --source <name>            Source identifier (default: hostname)
+  --ssl                      Use HTTPS
+  -k, --insecure             Skip TLS certificate verification
+  --ca-cert <path>           Path to custom CA certificate bundle for TLS
+  --sync                     Use /api/check (default: /api/checkAsync)
+  --retries <num>            Retry attempts per file (default: 3)
+            --dry-run                  Do not upload or contact the server; only show what would be submitted
+  --progress                 Force progress reporting
+  --no-progress              Disable progress reporting
+  --debug                    Enable debug log messages
+  --log-file <path>          Log file path (default: ./thunderstorm.log)
+  --no-log-file              Disable file logging
+  --syslog                   Enable syslog logging
+  --quiet                    Disable command-line logging
+  -h, --help                 Show this help text
+
+Examples:
+  bash thunderstorm-collector.sh --server thunderstorm.local
+  bash thunderstorm-collector.sh --server 10.0.0.5 --ssl --dir "/tmp/My Files" --dry-run
+EOF
+}
+
+is_integer() {
+    case "$1" in
+        ''|*[!0-9]*) return 1 ;;
+        *) return 0 ;;
+    esac
+}
+
+is_positive_integer() {
+    is_integer "$1" || return 1
+    [ "$1" -gt 0 ] 2>/dev/null || return 1
+}
+
+detect_source_name() {
+    [ -n "$SOURCE_NAME" ] && return 0
+    if command -v hostname >/dev/null 2>&1; then
+        SOURCE_NAME="$(hostname -f 2>/dev/null)"
+        [ -z "$SOURCE_NAME" ] && SOURCE_NAME="$(hostname 2>/dev/null)"
+    fi
+    [ -z "$SOURCE_NAME" ] && SOURCE_NAME="$(uname -n 2>/dev/null)"
+    [ -z "$SOURCE_NAME" ] && SOURCE_NAME="unknown-host"
+}
 
-    # Only report debug messages if mode is enabled
-    if [ "$type" == "debug" ] && [ $DEBUG -ne 1 ]; then
+build_query_source() {
+    local src="$1"
+    if [ -n "$src" ]; then
+        local encoded
+        encoded="$(urlencode "$src")"
+        printf "?source=%s" "$encoded"
+    fi
+}
+
+urlencode() {
+    local input="$1"
+    local out=""
+    local i ch hex_bytes byte
+
+    for ((i = 0; i < ${#input}; i++)); do
+        ch="${input:i:1}"
+        case "$ch" in
+            [a-zA-Z0-9.~_-])
+                out="${out}${ch}"
+                ;;
+            *)
+                # Get hex bytes (handles multi-byte UTF-8 characters)
+                hex_bytes="$(printf '%s' "$ch" | od -An -tx1 | tr -d ' \n')"
+                while [ -n "$hex_bytes" ]; do
+                    byte="${hex_bytes:0:2}"
+                    hex_bytes="${hex_bytes:2}"
+                    [ -n "$byte" ] && out="${out}%$(printf '%s' "$byte" | tr '[:lower:]' '[:upper:]')"
+                done
+                ;;
+        esac
+    done
+    printf "%s" "$out"
+}
+
+sanitize_filename_for_multipart() {
+    local input="$1"
+    # Keep multipart header/form attribute values simple and safe.
+    input="${input//\"/_}"
+    input="${input//;/_}"
+    input="${input//\\/_}"
+    input="${input//$'\r'/_}"
+    input="${input//$'\n'/_}"
+    [ -z "$input" ] && input="sample.bin"
+    printf "%s" "$input"
+}
+
+file_size_kb() {
+    # Use wc for portability across GNU/BSD and older systems.
+    local bytes
+    bytes="$(wc -c < "$1" 2>/dev/null)"
+    # Intentionally split on whitespace to normalize wc output ("   123\n" -> "123").
+    # shellcheck disable=SC2086
+    set -- $bytes
+    bytes="$1"
+    case "$bytes" in
+        ''|*[!0-9]*) echo -1; return 1 ;;
+    esac
+    echo $(( (bytes + 1023) / 1024 ))
+}
+
+mktemp_portable() {
+    local t
+    t="$(mktemp "${TMPDIR:-/tmp}/thunderstorm.XXXXXX" 2>/dev/null)"
+    if [ -n "$t" ] && [ -f "$t" ]; then
+        echo "$t"
         return 0
     fi
+    # Fallback: create a private directory first (mkdir is atomic), then a file inside it.
+    # This avoids the TOCTOU race of creating a predictable file in a shared /tmp.
+    local _dir="${TMPDIR:-/tmp}/thunderstorm.$$"
+    if [ ! -d "$_dir" ]; then
+        ( umask 077 && mkdir "$_dir" ) 2>/dev/null || return 1
+    fi
+    t="$_dir/${RANDOM:-0}.$(date +%N 2>/dev/null || echo 0)"
+    : > "$t" 2>/dev/null || return 1
+    echo "$t"
+}
 
-    # Exclude certain strings (false positives)
-    for ex_string in "${EXCLUDE_STRINGS[@]}";
-    do
-        # echo "Checking if $ex_string is in $message"
-        if [ "${message/$ex_string}" != "$message" ]; then
-            return 0
+detect_upload_tool() {
+    if command -v curl >/dev/null 2>&1; then
+        UPLOAD_TOOL="curl"
+        return 0
+    fi
+    if command -v wget >/dev/null 2>&1; then
+        UPLOAD_TOOL="wget"
+        return 0
+    fi
+    return 1
+}
+
+upload_with_curl() {
+    local endpoint="$1"
+    local filepath="$2"
+    local filename="$3"
+    local safe_filename
+    local resp_file
+    local header_file
+    local code
+    local http_code
+
+    safe_filename="$(sanitize_filename_for_multipart "$filename")"
+
+    resp_file="$(mktemp_portable)" || return 91
+    TMP_FILES_ARR+=("$resp_file")
+    header_file="$(mktemp_portable)" || return 91
+    TMP_FILES_ARR+=("$header_file")
+
+    # Build form argument safely — curl handles @path internally
+    local form_arg="file=@${filepath};filename=${safe_filename}"
+
+    local err_file
+    err_file="$(mktemp_portable)" || return 91
+    TMP_FILES_ARR+=("$err_file")
+
+    curl -sS --show-error -X POST "${CURL_EXTRA_OPTS[@]}" \
+        --max-time 300 \
+        -D "$header_file" \
+        "$endpoint" \
+        -F "$form_arg" \
+        > "$resp_file" 2>"$err_file"
+    code=$?
+
+    if [ $code -ne 0 ]; then
+        local _curl_err
+        _curl_err="$(cat "$err_file" 2>/dev/null)"
+        [ -n "$_curl_err" ] && log_msg debug "curl error: $_curl_err"
+    fi
+
+    # Extract HTTP status code from headers
+    http_code="$(grep -oE 'HTTP/[0-9.]+ [0-9]+' "$header_file" 2>/dev/null | tail -1 | grep -oE '[0-9]+$')"
+
+    # Handle 503 back-pressure
+    if [ "$http_code" = "503" ]; then
+        local retry_after
+        retry_after="$(grep -i '^Retry-After:' "$header_file" 2>/dev/null | head -1 | sed 's/[^0-9]//g')"
+        if [ -n "$retry_after" ] && [ "$retry_after" -gt 0 ] 2>/dev/null; then
+            [ "$retry_after" -gt 120 ] && retry_after=120
+            log_msg warn "Server returned 503, waiting ${retry_after}s (Retry-After)"
+            sleep "$retry_after"
+        fi
+        return 93
+    fi
+
+    if [ $code -ne 0 ]; then
+        return $code
+    fi
+
+    # Check for non-2xx HTTP status
+    if [ -n "$http_code" ] && [ "$http_code" -ge 400 ] 2>/dev/null; then
+        local body
+        body="$(cat "$resp_file" 2>/dev/null)"
+        body="${body//$'\r'/ }"
+        body="${body//$'\n'/ }"
+        log_msg error "Server returned HTTP $http_code for '$filepath': $body"
+        return 92
+    fi
+
+    return 0
+}
+
+upload_with_wget() {
+    # Portable multipart fallback for systems without curl.
+    local endpoint="$1"
+    local filepath="$2"
+    local filename="$3"
+    local safe_filename
+    local boundary
+    local body_file
+    local resp_file
+    local header_file
+    local code
+
+    safe_filename="$(sanitize_filename_for_multipart "$filename")"
+
+    # Generate a boundary that does not appear in the file content or metadata.
+    # Retry with different random seeds to avoid multipart corruption.
+    local _boundary_attempts=0
+    boundary="----ThunderstormBoundary${$}${RANDOM}${RANDOM}$(date +%s%N 2>/dev/null || echo 0)"
+    while [ "$_boundary_attempts" -lt 10 ]; do
+        if ! LC_ALL=C grep -qF "$boundary" "$filepath" 2>/dev/null; then
+            # Also check it doesn't appear in metadata fields
+            case "${SOURCE_NAME}${filepath}" in
+                *"$boundary"*) ;;
+                *) break ;;
+            esac
         fi
+        _boundary_attempts=$((_boundary_attempts + 1))
+        boundary="----ThunderstormBoundary${$}${RANDOM}${RANDOM}${_boundary_attempts}$(date +%s%N 2>/dev/null || echo 0)"
     done
+    if [ "$_boundary_attempts" -ge 10 ]; then
+        log_msg warn "Could not find safe multipart boundary for '$filepath', upload may be malformed"
+    fi
+    body_file="$(mktemp_portable)" || return 93
+    TMP_FILES_ARR+=("$body_file")
+    resp_file="$(mktemp_portable)" || return 94
+    TMP_FILES_ARR+=("$resp_file")
+    header_file="$(mktemp_portable)" || return 94
+    TMP_FILES_ARR+=("$header_file")
+
+    {
+        printf -- "--%s\r\n" "$boundary"
+        printf 'Content-Disposition: form-data; name="file"; filename="%s"\r\n' "$safe_filename"
+        printf 'Content-Type: application/octet-stream\r\n\r\n'
+        cat "$filepath"
+        printf '\r\n--%s--\r\n' "$boundary"
+    } > "$body_file" 2>/dev/null || return 95
+
+    wget -S -O "$resp_file" "${WGET_EXTRA_OPTS[@]}" \
+        --timeout=300 \
+        --header="Content-Type: multipart/form-data; boundary=${boundary}" \
+        --post-file="$body_file" \
+        "$endpoint" 2>"$header_file"
+    code=$?
+
+    # Extract HTTP status code from headers (wget -S writes headers to stderr with leading spaces)
+    local http_code
+    http_code="$(grep -oE 'HTTP/[0-9.]+[[:space:]]+[0-9]+' "$header_file" 2>/dev/null | tail -1 | grep -oE '[0-9]+$')"
+
+    # Handle 503 back-pressure
+    if [ "$http_code" = "503" ]; then
+        local retry_after
+        retry_after="$(grep -i 'Retry-After' "$header_file" 2>/dev/null | head -1 | sed 's/[^0-9]//g')"
+        if [ -n "$retry_after" ] && [ "$retry_after" -gt 0 ] 2>/dev/null; then
+            [ "$retry_after" -gt 120 ] && retry_after=120
+            log_msg warn "Server returned 503, waiting ${retry_after}s (Retry-After)"
+            sleep "$retry_after"
+        fi
+        return 93
+    fi
+
+    if [ $code -ne 0 ]; then
+        return $code
+    fi
+
+    # Check for non-2xx HTTP status
+    if [ -n "$http_code" ] && [ "$http_code" -ge 400 ] 2>/dev/null; then
+        local body
+        body="$(tr '\r\n' '  ' < "$resp_file" 2>/dev/null)"
+        log_msg error "Server returned HTTP $http_code for '$filepath': $body"
+        return 96
+    fi
+
+    return 0
+}
+
+# collection_marker -- POST a begin/end marker to /api/collection
+# Args: $1=base_url  $2=type(begin|end)  $3=scan_id(optional)  $4=stats_json(optional)
+# Returns: scan_id extracted from response (empty if unsupported or failed)
+json_escape() {
+    local s="$1"
+    # Order matters: escape backslashes first, then other special chars
+    s="${s//\\/\\\\}"
+    s="${s//\"/\\\"}"
+    s="${s//$'\n'/\\n}"
+    s="${s//$'\r'/\\r}"
+    s="${s//$'\t'/\\t}"
+    s="${s//$'\010'/\\b}"   # backspace
+    s="${s//$'\014'/\\f}"   # form feed
+    # Remove remaining control characters (0x00-0x1f) that could break JSON
+    s="$(printf '%s' "$s" | tr -d '\000-\007\013\016-\037')"
+    printf '%s' "$s"
+}
+
+# collection_marker -- POST a begin/end marker to /api/collection
+# Args: $1=base_url  $2=type(begin|end)  $3=scan_id(optional)  $4=stats_json(optional)
+# Outputs: scan_id extracted from response on stdout (empty if unsupported or failed)
+# Returns: 0 on success, non-zero on failure
+collection_marker() {
+    local base_url="$1"
+    local marker_type="$2"
+    local scan_id="${3:-}"
+    local stats_json="${4:-}"
+    local marker_url="${base_url}/api/collection"
+    local body scan_id_out resp_file header_file
 
-    # Remove line breaks
-    message=$(echo "$message" | tr -d '\r' | tr '\n' ' ') 
+    resp_file="$(mktemp_portable)" || return 1
+    TMP_FILES_ARR+=("$resp_file")
+    header_file="$(mktemp_portable)" || return 1
+    TMP_FILES_ARR+=("$header_file")
 
-    # Remove prefix (e.g. [+])
-    if [[ "${message:0:1}" == "[" ]]; then
-        message_cleaned="${message:4:${#message}}"
+    # Build JSON body with proper escaping
+    local safe_source safe_scan_id
+    safe_source="$(json_escape "$SOURCE_NAME")"
+    safe_scan_id="$(json_escape "$scan_id")"
+
+    local safe_marker_type
+    safe_marker_type="$(json_escape "$marker_type")"
+    body="{\"type\":\"${safe_marker_type}\""
+    body="${body},\"source\":\"${safe_source}\""
+    body="${body},\"collector\":\"bash/${VERSION}\""
+    body="${body},\"timestamp\":\"$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || date -u)\""
+    [ -n "$scan_id"    ] && body="${body},\"scan_id\":\"${safe_scan_id}\""
+    [ -n "$stats_json" ] && body="${body},${stats_json}"
+    body="${body}}"
+
+    local _marker_rc=1
+    local _marker_attempts=1
+    [ "$marker_type" = "begin" ] && _marker_attempts=2
+
+    local _http_code
+    local _attempt=0
+    while [ "$_attempt" -lt "$_marker_attempts" ]; do
+        _attempt=$((_attempt + 1))
+        _marker_rc=1
+        : > "$header_file"
+        # Attempt POST — capture HTTP status to detect server-side errors
+        if command -v curl >/dev/null 2>&1; then
+            curl -sS -D "$header_file" -o "$resp_file" "${CURL_EXTRA_OPTS[@]}" \
+                -H "Content-Type: application/json" \
+                -d "$body" \
+                --max-time 10 \
+                "$marker_url" 2>/dev/null
+            _marker_rc=$?
+        elif command -v wget >/dev/null 2>&1; then
+            wget -S -O "$resp_file" "${WGET_EXTRA_OPTS[@]}" \
+                --header "Content-Type: application/json" \
+                --post-data "$body" \
+                --timeout=10 \
+                "$marker_url" 2>"$header_file"
+            _marker_rc=$?
+        fi
+        # If transport succeeded, validate HTTP status code.
+        # 404/501 means the server doesn't implement marker endpoint; continue without scan_id.
+        if [ "$_marker_rc" -eq 0 ]; then
+            _http_code="$(grep -oE 'HTTP/[0-9.]+[[:space:]]+[0-9]+' "$header_file" 2>/dev/null | tail -1 | grep -oE '[0-9]+$')"
+            if [ -n "$_http_code" ] && [ "$_http_code" -ge 400 ] 2>/dev/null; then
+                if [ "$_http_code" = "404" ] || [ "$_http_code" = "501" ]; then
+                    log_msg warn "Collection marker '$marker_type' not supported (HTTP $_http_code) — server does not implement /api/collection"
+                    _marker_rc=0
+                else
+                    log_msg warn "Collection marker '$marker_type' received HTTP $_http_code"
+                    _marker_rc=1
+                fi
+            fi
+        fi
+        if [ "$_marker_rc" -eq 0 ]; then
+            break
+        fi
+        if [ "$_attempt" -lt "$_marker_attempts" ]; then
+            log_msg warn "Begin marker failed (attempt $_attempt/$_marker_attempts), retrying in 2s..."
+            sleep 2
+        fi
+    done
+
+    # Extract scan_id from response, handling JSON escapes (e.g. \" and \\ inside the value).
+    # Uses awk to find the "scan_id" key and parse the JSON string value properly.
+    scan_id_out="$(awk '
+    BEGIN { found = 0 }
+    {
+        s = s $0
+    }
+    END {
+        # Find "scan_id" key
+        idx = index(s, "\"scan_id\"")
+        if (idx == 0) exit
+        rest = substr(s, idx + length("\"scan_id\""))
+        # Skip whitespace and colon
+        gsub(/^[[:space:]]*:[[:space:]]*/, "", rest)
+        # Must start with quote
+        if (substr(rest, 1, 1) != "\"") exit
+        rest = substr(rest, 2)
+        val = ""
+        while (length(rest) > 0) {
+            c = substr(rest, 1, 1)
+            if (c == "\\") {
+                # Escaped character
+                nc = substr(rest, 2, 1)
+                if (nc == "\"") { val = val "\""; rest = substr(rest, 3) }
+                else if (nc == "\\") { val = val "\\"; rest = substr(rest, 3) }
+                else if (nc == "n") { val = val "\n"; rest = substr(rest, 3) }
+                else if (nc == "r") { val = val "\r"; rest = substr(rest, 3) }
+                else if (nc == "t") { val = val "\t"; rest = substr(rest, 3) }
+                else if (nc == "/") { val = val "/"; rest = substr(rest, 3) }
+                else if (nc == "b") { val = val "\b"; rest = substr(rest, 3) }
+                else if (nc == "f") { val = val "\f"; rest = substr(rest, 3) }
+                else if (nc == "u") {
+                    # \uXXXX unicode escape
+                    hex = substr(rest, 3, 4)
+                    rest = substr(rest, 7)
+                    if (length(hex) == 4) {
+                        # Convert hex to decimal
+                        cp = 0
+                        for (hi = 1; hi <= 4; hi++) {
+                            hc = substr(hex, hi, 1)
+                            if (hc >= "0" && hc <= "9") cp = cp * 16 + (hc + 0)
+                            else if (hc == "a" || hc == "A") cp = cp * 16 + 10
+                            else if (hc == "b" || hc == "B") cp = cp * 16 + 11
+                            else if (hc == "c" || hc == "C") cp = cp * 16 + 12
+                            else if (hc == "d" || hc == "D") cp = cp * 16 + 13
+                            else if (hc == "e" || hc == "E") cp = cp * 16 + 14
+                            else if (hc == "f" || hc == "F") cp = cp * 16 + 15
+                            else { cp = -1; break }
+                        }
+                        if (cp >= 32 && cp <= 126) {
+                            val = val sprintf("%c", cp)
+                        } else if (cp >= 0) {
+                            # Non-ASCII or control char: replace with underscore
+                            val = val "_"
+                        }
+                        # cp == -1: invalid hex, skip silently
+                    }
+                }
+                else { val = val nc; rest = substr(rest, 3) }
+            } else if (c == "\"") {
+                break
+            } else {
+                val = val c
+                rest = substr(rest, 2)
+            }
+        }
+        printf "%s", val
+    }' "$resp_file" 2>/dev/null)"
+
+    # Validate scan_id: reject empty values, control characters, and unreasonably long values.
+    # The value is JSON-escaped for markers and URL-encoded for query parameters, so we only
+    # need to guard against control characters and excessive length.
+    if [ ${#scan_id_out} -gt 256 ]; then
+        scan_id_out=""
     else
-        message_cleaned="$message"
-    fi
-
-    # Log to file
-    if [[ $LOG_TO_FILE -eq 1 ]]; then
-        echo "$ts $type $message_cleaned" >> "$LOGFILE"
-    fi
-    # Log to syslog
-    if [[ $LOG_TO_SYSLOG -eq 1 ]]; then
-        logger -p "$SYSLOG_FACILITY.$type" "$(basename "$0"): $message_cleaned"
-    fi
-    # Log to command line
-    if [[ $LOG_TO_CMDLINE -eq 1 ]]; then
-        echo "$message" >&2
-    fi
-}
-
-function check_req 
-{
-    curl_avail=$(command -v curl)
-    if [[ -z $curl_avail ]]; then 
-        log error "The 'curl' command can't be found but is needed"
-        exit 1
-    fi
-}
-
-# Program -------------------------------------------------------------
-
-echo "=============================================================="
-echo "    ________                __            __                "
-echo "   /_  __/ /  __ _____  ___/ /__ _______ / /____  ______ _  "
-echo "    / / / _ \/ // / _ \/ _  / -_) __(_-</ __/ _ \/ __/  ' \ "
-echo "   /_/ /_//_/\_,_/_//_/\_,_/\__/_/ /___/\__/\___/_/ /_/_/_/ "
-echo "   v$VERSION"
-echo " "
-echo "   THOR Thunderstorm Collector for Linux/Unix"
-echo "   Florian Roth, September 2020"
-echo "=============================================================="
-
-# Root check
-if [ "$(id -u)" != "0" ]; then
-   log error "This script should be run as root to have access to all files on disk" 1>&2
-fi
-
-echo "Writing log file to $LOGFILE ..."
-
-log info "Started Thunderstorm Collector - Version $VERSION"
-log info "Transmitting samples to $THUNDERSTORM_SERVER"
-log info "Processing folders ${SCAN_FOLDERS[*]}"
-log info "Only check files created / modified within $MAX_AGE days"
-log info "Only process files smaller $MAX_FILE_SIZE KB"
-
-# Check requirements
-check_req
-
-# Some presets
-api_endpoint="check"
-if [[ $ASYNC_MODE -eq 1 ]]; then
-    api_endpoint="checkAsync"
-fi
-scheme="http"
-if [[ $USE_SSL -eq 1 ]]; then
-    scheme="https"
-fi
-source=""
-if [[ -n $HOSTNAME ]]; then
-    source="?source=${HOSTNAME}"
-fi
-
-# Loop over filesystem
-for scandir in "${SCAN_FOLDERS[@]}";
-do
-    find "$scandir" -type f  -mtime -$MAX_AGE 2> /dev/null | while read -r file_path
-    do
-        if [ -f "${file_path}" ]; then
-            # Check Size
-            filesize=$(du -k "$file_path" | cut -f1)
-            if [ "${filesize}" -gt $MAX_FILE_SIZE ]; then
+        # Remove any control characters (0x00-0x1f, 0x7f) — if the result differs, reject it
+        local _sanitized
+        _sanitized="$(printf '%s' "$scan_id_out" | tr -d '\000-\037\177')"
+        if [ "$_sanitized" != "$scan_id_out" ]; then
+            scan_id_out=""
+        fi
+    fi
+
+    printf '%s' "$scan_id_out"
+    return "$_marker_rc"
+}
+
+submit_file() {
+    local endpoint="$1"
+    local filepath="$2"
+    local filename
+    local try=1
+    local rc=1
+    local wait=2
+    local max_503_retries=5
+    local _503_count=0
+
+    # Preserve client-side path in multipart filename for server-side audit logs.
+    filename="$filepath"
+
+    if [ "$DRY_RUN" -eq 1 ]; then
+        log_msg info "DRY-RUN: would submit '$filepath'"
+        return 0
+    fi
+
+    while [ "$try" -le "$RETRIES" ]; do
+        if [ "$UPLOAD_TOOL" = "curl" ]; then
+            upload_with_curl "$endpoint" "$filepath" "$filename"
+            rc=$?
+        else
+            upload_with_wget "$endpoint" "$filepath" "$filename"
+            rc=$?
+        fi
+
+        if [ "$rc" -eq 0 ]; then
+            return 0
+        fi
+
+        # 503 back-pressure: sleep already happened in upload function,
+        # retry without counting against the normal retry budget (up to a cap)
+        if [ "$rc" -eq 93 ]; then
+            _503_count=$((_503_count + 1))
+            if [ "$_503_count" -lt "$max_503_retries" ]; then
+                log_msg warn "Retrying '$filepath' after 503 back-pressure ($_503_count/$max_503_retries)"
                 continue
             fi
-            log debug "Submitting ${file_path} ..."
-            successful=0
-
-            for retry in {1..3}; do
-                # Submit sample
-                result=$(curl -s -X POST \
-                        "$scheme://$THUNDERSTORM_SERVER:$THUNDERSTORM_PORT/api/$api_endpoint$source" \
-                        --form "file=@${file_path};filename=${file_path}")
-                curl_exit=$?
-                if [ $curl_exit -ne 0 ]; then
-                    log error "Upload failed with code $curl_exit"
-                    sleep $((2 << retry))
-                    continue
-                fi
+            log_msg warn "Too many 503 responses for '$filepath', giving up"
+            return "$rc"
+        fi
+
+        log_msg warn "Upload failed for '$filepath' (attempt ${try}/${RETRIES}, code ${rc})"
+        if [ "$try" -lt "$RETRIES" ]; then
+            sleep "$wait"
+            wait=$((wait * 2))
+            # Cap backoff at 60 seconds
+            [ "$wait" -gt 60 ] && wait=60
+        fi
+        try=$((try + 1))
+    done
+
+    return "$rc"
+}
 
-                # If 'reason' in result
-                if [ "${result/reason}" != "$result" ]; then
-                    log error "$result"
-                    sleep $((2 << retry))
-                    continue
+parse_args() {
+    local arg
+    local add_dir_mode=0
+
+    while [ $# -gt 0 ]; do
+        arg="$1"
+        case "$arg" in
+            -h|--help)
+                print_help
+                exit 0
+                ;;
+            -s|--server)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                THUNDERSTORM_SERVER="$2"
+                shift
+                ;;
+            -p|--port)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                THUNDERSTORM_PORT="$2"
+                shift
+                ;;
+            -d|--dir)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                if [ "$add_dir_mode" -eq 0 ]; then
+                    SCAN_FOLDERS=()
+                    add_dir_mode=1
                 fi
-                successful=1
+                SCAN_FOLDERS+=("$2")
+                shift
+                ;;
+            --max-age)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                MAX_AGE="$2"
+                shift
+                ;;
+            --max-size-kb)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                MAX_FILE_SIZE_KB="$2"
+                shift
+                ;;
+            --source)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                SOURCE_NAME="$2"
+                shift
+                ;;
+            --ssl)
+                USE_SSL=1
+                ;;
+            -k|--insecure)
+                INSECURE=1
+                ;;
+            --ca-cert)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                CA_CERT="$2"
+                USE_SSL=1
+                shift
+                ;;
+            --sync)
+                ASYNC_MODE=0
+                ;;
+            --retries)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                RETRIES="$2"
+                shift
+                ;;
+            --dry-run)
+                DRY_RUN=1
+                ;;
+            --debug)
+                DEBUG=1
+                ;;
+            --log-file)
+                [ -n "${2:-}" ] || die "Missing value for $arg"
+                LOGFILE="$2"
+                shift
+                ;;
+            --no-log-file)
+                LOG_TO_FILE=0
+                ;;
+            --syslog)
+                LOG_TO_SYSLOG=1
+                ;;
+            --quiet)
+                LOG_TO_CMDLINE=0
+                ;;
+            --progress)
+                PROGRESS_MODE="on"
+                ;;
+            --no-progress)
+                PROGRESS_MODE="off"
+                ;;
+            --)
+                shift
                 break
-            done
-            if [ $successful -ne 1 ]; then
-                log error "Could not upload ${file_path}"
-            fi
+                ;;
+            -*)
+                die "Unknown option: $arg (use --help)"
+                ;;
+            *)
+                # Positional args are treated as additional directories.
+                if [ "$add_dir_mode" -eq 0 ]; then
+                    SCAN_FOLDERS=()
+                    add_dir_mode=1
+                fi
+                SCAN_FOLDERS+=("$arg")
+                ;;
+        esac
+        shift
+    done
+}
+
+validate_config() {
+    is_integer "$THUNDERSTORM_PORT" || die "Port must be numeric: '$THUNDERSTORM_PORT'"
+    is_integer "$MAX_AGE" || die "max-age must be numeric: '$MAX_AGE'"
+    is_integer "$MAX_FILE_SIZE_KB" || die "max-size-kb must be numeric: '$MAX_FILE_SIZE_KB'"
+    is_integer "$RETRIES" || die "retries must be numeric: '$RETRIES'"
+
+    [ "$THUNDERSTORM_PORT" -gt 0 ] || die "Port must be greater than 0"
+    [ "$MAX_AGE" -ge 0 ] || die "max-age must be >= 0"
+    [ "$MAX_FILE_SIZE_KB" -gt 0 ] || die "max-size-kb must be > 0"
+    [ "$RETRIES" -ge 1 ] || die "retries must be >= 1"
+
+    [ -n "$THUNDERSTORM_SERVER" ] || die "Server must not be empty"
+    if [ "${#SCAN_FOLDERS[@]}" -eq 0 ]; then
+        die "At least one directory is required"
+    fi
+    if [ -n "$CA_CERT" ] && [ ! -f "$CA_CERT" ]; then
+        die "CA certificate file not found: '$CA_CERT'"
+    fi
+    if [ -n "$CA_CERT" ] && [ "$INSECURE" -eq 1 ]; then
+        log_msg warn "--ca-cert and --insecure are both set; --insecure takes precedence"
+    fi
+}
+
+main() {
+    local scheme="http"
+    local endpoint_name="check"
+    local query_source=""
+    local api_endpoint=""
+    local base_url=""
+    local scandir
+    local file_path
+    local size_kb
+    local elapsed=0
+    local find_mtime
+    local find_results_file
+
+    parse_args "$@"
+    detect_source_name
+    validate_config
+    print_banner
+
+    if [ "$(id -u 2>/dev/null || echo 1)" != "0" ]; then
+        log_msg warn "Running without root privileges; some files may be inaccessible"
+    fi
+
+    if [ "$USE_SSL" -eq 1 ]; then
+        scheme="https"
+    fi
+    CURL_EXTRA_OPTS=()
+    WGET_EXTRA_OPTS=()
+    if [ "$INSECURE" -eq 1 ]; then
+        CURL_EXTRA_OPTS+=("-k")
+        WGET_EXTRA_OPTS+=("--no-check-certificate")
+    fi
+    if [ -n "$CA_CERT" ]; then
+        CURL_EXTRA_OPTS+=("--cacert" "$CA_CERT")
+        WGET_EXTRA_OPTS+=("--ca-certificate=$CA_CERT")
+    fi
+    if [ "$ASYNC_MODE" -eq 1 ]; then
+        endpoint_name="checkAsync"
+    fi
+
+    query_source="$(build_query_source "$SOURCE_NAME")"
+    base_url="${scheme}://${THUNDERSTORM_SERVER}:${THUNDERSTORM_PORT}"
+    # Strip any trailing slash from base_url
+    base_url="${base_url%/}"
+    api_endpoint="${base_url}/api/${endpoint_name}${query_source}"
+
+    if [ "$DRY_RUN" -eq 1 ]; then
+        detect_upload_tool || true
+    fi
+
+    log_msg info "Started Thunderstorm Collector - Version $VERSION"
+    log_msg info "Server: $THUNDERSTORM_SERVER"
+    log_msg info "Port: $THUNDERSTORM_PORT"
+    log_msg info "API endpoint: $api_endpoint"
+    log_msg info "Max age (days): $MAX_AGE"
+    log_msg info "Max size (KB): $MAX_FILE_SIZE_KB"
+    log_msg info "Source: $SOURCE_NAME"
+    log_msg info "Folders: ${SCAN_FOLDERS[*]}"
+    [ "$DRY_RUN" -eq 1 ] && log_msg info "Dry-run mode enabled"
+
+    # Send collection begin marker; capture scan_id if server returns one
+    if [ "$DRY_RUN" -eq 0 ]; then
+        if ! detect_upload_tool; then
+            log_msg error "Neither 'curl' nor 'wget' is installed; unable to upload samples"
+            exit 2
+        fi
+        local _begin_resp_file
+        _begin_resp_file="$(mktemp_portable)" || { log_msg error "Cannot create temp file"; exit 2; }
+        TMP_FILES_ARR+=("$_begin_resp_file")
+        collection_marker "$base_url" "begin" "" "" > "$_begin_resp_file"
+        local _begin_rc
+        _begin_rc=$?
+        SCAN_ID="$(cat "$_begin_resp_file" 2>/dev/null)"
+        # If the begin marker failed after retry, the server is unreachable — fatal error
+        if [ "$_begin_rc" -ne 0 ]; then
+            log_msg error "Cannot connect to Thunderstorm server at ${base_url} (begin marker failed after retry)"
+            exit 2
+        fi
+        if [ -n "$SCAN_ID" ]; then
+            log_msg info "Collection scan_id: $SCAN_ID"
+            case "$api_endpoint" in
+                *\?*) api_endpoint="${api_endpoint}&scan_id=$(urlencode "$SCAN_ID")" ;;
+                *)    api_endpoint="${api_endpoint}?scan_id=$(urlencode "$SCAN_ID")" ;;
+            esac
+        fi
+    else
+        log_msg info "Dry-run mode: skipping server connection"
+    fi
+
+    # Determine progress display mode
+    if [ "$PROGRESS_MODE" = "on" ]; then
+        SHOW_PROGRESS=1
+    elif [ "$PROGRESS_MODE" = "off" ]; then
+        SHOW_PROGRESS=0
+    elif [ -t 2 ]; then
+        SHOW_PROGRESS=1
+    else
+        SHOW_PROGRESS=0
+    fi
+
+    # Build find exclusions once (shared across all scan dirs)
+    local find_excludes=()
+    local _ep
+    for _ep in "${EXCLUDE_PATHS[@]}"; do
+        [ -d "$_ep" ] && find_excludes+=(-path "$_ep" -prune -o)
+    done
+    local _mount_list
+    _mount_list="$(get_excluded_mounts)"
+    if [ -n "$_mount_list" ]; then
+        while IFS= read -r _ep; do
+            [ -n "$_ep" ] && [ -d "$_ep" ] && find_excludes+=(-path "$_ep" -prune -o)
+        done <<< "$_mount_list"
+    fi
+
+    # Prune known cloud storage directory names at the find level so they are
+    # excluded from both the file count and processing (keeps progress accurate).
+    local _cloud_name
+    for _cloud_name in $CLOUD_DIR_NAMES; do
+        find_excludes+=(\( -iname "$_cloud_name" -type d -prune \) -o)
+    done
+    local _old_ifs="$IFS"
+    IFS='|'
+    for _cloud_name in $CLOUD_DIR_NAMES_SPACED; do
+        find_excludes+=(\( -iname "$_cloud_name" -type d -prune \) -o)
+    done
+    for _cloud_name in $CLOUD_DIR_PATTERNS; do
+        find_excludes+=(\( -iname "${_cloud_name}*" -type d -prune \) -o)
+    done
+    IFS="$_old_ifs"
+    # Also prune macOS CloudStorage
+    find_excludes+=(\( -iname "CloudStorage" -path "*/Library/CloudStorage" -type d -prune \) -o)
+
+    # First pass: collect all file lists and count total files for progress
+    local all_find_files=()
+    for scandir in "${SCAN_FOLDERS[@]}"; do
+        if [ ! -d "$scandir" ]; then
+            log_msg warn "Skipping non-directory path '$scandir'"
+            continue
         fi
-    done 
-done
-exit 0
\ No newline at end of file
+
+        log_msg info "Scanning '$scandir'"
+        find_results_file="$(mktemp_portable)" || {
+            log_msg error "Could not create temporary file list for '$scandir'"
+            continue
+        }
+        TMP_FILES_ARR+=("$find_results_file")
+        if [ "$MAX_AGE" -gt 0 ]; then
+            find "$scandir" "${find_excludes[@]}" -type f -mtime "-${MAX_AGE}" -print0 > "$find_results_file" 2>/dev/null || true
+        else
+            # MAX_AGE=0 means no age filter — collect all files regardless of modification time
+            find "$scandir" "${find_excludes[@]}" -type f -print0 > "$find_results_file" 2>/dev/null || true
+        fi
+        all_find_files+=("$find_results_file")
+
+        # Count files in this result set (each entry is null-terminated by -print0)
+        local _count=0
+        if [ -s "$find_results_file" ]; then
+            # Count null bytes = number of file entries from -print0
+            _count="$(tr -cd '\0' < "$find_results_file" 2>/dev/null | wc -c)"
+            # Normalize whitespace from wc output
+            _count="${_count//[[:space:]]/}"
+            _count="${_count:-0}"
+        fi
+        TOTAL_FILES=$((TOTAL_FILES + _count))
+    done
+
+    log_msg info "Found $TOTAL_FILES candidate files"
+
+    local _processed=0
+    for find_results_file in "${all_find_files[@]}"; do
+        while IFS= read -r -d '' file_path; do
+            # Check for interruption between files
+            [ "$INTERRUPTED" -eq 1 ] && break 2
+
+            _processed=$((_processed + 1))
+
+            # Show progress
+            if [ "$SHOW_PROGRESS" -eq 1 ] && [ "$TOTAL_FILES" -gt 0 ]; then
+                printf '\r[%d/%d] %d%%' "$_processed" "$TOTAL_FILES" "$(( _processed * 100 / TOTAL_FILES ))" >&2
+            fi
+
+            [ -f "$file_path" ] || continue
+
+            FILES_SCANNED=$((FILES_SCANNED + 1))
+
+            # Skip files inside cloud storage folders
+            if is_cloud_path "$file_path"; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping cloud storage path '$file_path'"
+                continue
+            fi
+
+            size_kb="$(file_size_kb "$file_path")"
+            if [ "$size_kb" -lt 0 ]; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping unreadable file '$file_path'"
+                continue
+            fi
+
+            if [ "$size_kb" -gt "$MAX_FILE_SIZE_KB" ]; then
+                FILES_SKIPPED=$((FILES_SKIPPED + 1))
+                log_msg debug "Skipping '$file_path' due to size (${size_kb}KB)"
+                continue
+            fi
+
+            log_msg debug "Submitting '$file_path'"
+            if submit_file "$api_endpoint" "$file_path"; then
+                FILES_SUBMITTED=$((FILES_SUBMITTED + 1))
+            else
+                FILES_FAILED=$((FILES_FAILED + 1))
+                log_msg error "Could not upload '$file_path'"
+            fi
+        done < "$find_results_file"
+    done
+
+    if [ "$START_TS" -gt 0 ] 2>/dev/null; then
+        elapsed=$(( $(date +%s 2>/dev/null || echo "$START_TS") - START_TS ))
+        [ "$elapsed" -lt 0 ] && elapsed=0
+    fi
+
+    # Clear progress line if we were showing progress
+    if [ "$SHOW_PROGRESS" -eq 1 ]; then
+        printf '\r\033[K' >&2
+    fi
+
+    log_msg info "Run completed: scanned=$FILES_SCANNED submitted=$FILES_SUBMITTED skipped=$FILES_SKIPPED failed=$FILES_FAILED seconds=$elapsed"
+
+    # Send collection end marker with run statistics
+    if [ "$DRY_RUN" -eq 0 ]; then
+        local stats_json="\"stats\":{\"scanned\":${FILES_SCANNED},\"submitted\":${FILES_SUBMITTED},\"skipped\":${FILES_SKIPPED},\"failed\":${FILES_FAILED},\"elapsed_seconds\":${elapsed}}"
+        collection_marker "$base_url" "end" "$SCAN_ID" "$stats_json" >/dev/null
+    fi
+
+    if [ "$FILES_FAILED" -gt 0 ]; then
+        return 1
+    fi
+    return 0
+}
+
+main "$@"
+exit $?