diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c011f90..588666e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -116,3 +116,50 @@ jobs: if: always() working-directory: demos/moveit_pick_place run: docker compose --profile ci down + + build-and-test-multi-ecu: + needs: lint + runs-on: ubuntu-24.04 + steps: + - name: Show triggering source + if: github.event_name == 'repository_dispatch' + run: | + SHA="${{ github.event.client_payload.sha }}" + RUN_URL="${{ github.event.client_payload.run_url }}" + echo "## Triggered by ros2_medkit" >> "$GITHUB_STEP_SUMMARY" + echo "- Commit: \`${SHA:-unknown}\`" >> "$GITHUB_STEP_SUMMARY" + if [ -n "$RUN_URL" ]; then + echo "- Run: [View triggering run]($RUN_URL)" >> "$GITHUB_STEP_SUMMARY" + else + echo "- Run: (URL not provided)" >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build and start multi-ECU demo + working-directory: demos/multi_ecu_aggregation + run: docker compose --profile ci up -d --build perception-ecu-ci planning-ecu-ci actuation-ecu-ci + + - name: Run smoke tests + run: ./tests/smoke_test_multi_ecu.sh + + - name: Show perception ECU logs on failure + if: failure() + working-directory: demos/multi_ecu_aggregation + run: docker compose --profile ci logs perception-ecu-ci --tail=200 + + - name: Show planning ECU logs on failure + if: failure() + working-directory: demos/multi_ecu_aggregation + run: docker compose --profile ci logs planning-ecu-ci --tail=200 + + - name: Show actuation ECU logs on failure + if: failure() + working-directory: demos/multi_ecu_aggregation + run: docker compose --profile ci logs actuation-ecu-ci --tail=200 + + - name: Teardown + if: always() + working-directory: demos/multi_ecu_aggregation + run: docker compose --profile ci down diff --git a/.gitignore b/.gitignore index 7cdd647..aaa2fae 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,4 @@ docker-compose.local.yml .env .venv/ venv/ +demos/multi_ecu_aggregation/launch/__pycache__/ diff --git a/README.md b/README.md index 3624498..c21c948 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,12 @@ This repository contains example integrations and demos that show how ros2_medki can be used to add SOVD-compliant diagnostics and fault management to ROS 2-based robots and systems. Each demo builds on real-world scenarios, progressing from simple sensor monitoring -to complete mobile robot integration: +to complete mobile robot integration and multi-ECU peer aggregation: -- **Sensor Diagnostics** — Lightweight demo focusing on data monitoring and fault injection -- **TurtleBot3 Integration** — Full-featured demo with Nav2 navigation, showing entity hierarchy and real-time control -- **MoveIt Pick-and-Place** — Panda 7-DOF arm manipulation with MoveIt 2, fault monitoring for planning, controllers, and joint limits +- **Sensor Diagnostics** - Lightweight demo focusing on data monitoring and fault injection +- **TurtleBot3 Integration** - Full-featured demo with Nav2 navigation, showing entity hierarchy and real-time control +- **MoveIt Pick-and-Place** - Panda 7-DOF arm manipulation with MoveIt 2, fault monitoring for planning, controllers, and joint limits +- **Multi-ECU Aggregation** - Peer aggregation with 3 ECUs (perception, planning, actuation), mDNS discovery, and cross-ECU functions **Key Capabilities Demonstrated:** @@ -29,8 +30,11 @@ to complete mobile robot integration: - ✅ Fault management and injection - ✅ Manifest-based entity discovery - ✅ Legacy diagnostics bridge support +- ✅ Multi-ECU peer aggregation +- ✅ mDNS-based ECU discovery +- ✅ Cross-ECU function grouping -Both demos support: +All demos support: - REST API access via SOVD protocol - Web UI for visualization ([ros2_medkit_web_ui](https://github.com/selfpatch/ros2_medkit_web_ui)) @@ -44,6 +48,7 @@ Both demos support: | [Sensor Diagnostics](demos/sensor_diagnostics/) | Lightweight sensor diagnostics demo (no Gazebo required) | Data monitoring, fault injection, dual fault reporting paths | ✅ Ready | | [TurtleBot3 Integration](demos/turtlebot3_integration/) | Full ros2_medkit integration with TurtleBot3 and Nav2 | SOVD-compliant API, manifest-based discovery, fault management | ✅ Ready | | [MoveIt Pick-and-Place](demos/moveit_pick_place/) | Panda 7-DOF arm with MoveIt 2 manipulation and ros2_medkit | Planning fault detection, controller monitoring, joint limits | ✅ Ready | +| [Multi-ECU Aggregation](demos/multi_ecu_aggregation/) | Multi-ECU peer aggregation with 3 ECUs (perception, planning, actuation), mDNS discovery, cross-ECU functions | Peer aggregation, mDNS discovery, cross-ECU functions | ✅ Ready | ### Quick Start @@ -123,6 +128,28 @@ cd demos/moveit_pick_place - 5 fault injection scenarios with one-click scripts - SOVD-compliant REST API with rich entity hierarchy (4 areas, 7 components) +#### Multi-ECU Aggregation Demo (Advanced - Peer Aggregation) + +Multi-ECU demo with 3 independent ECUs aggregated via mDNS discovery: + +```bash +cd demos/multi_ecu_aggregation +./run-demo.sh +./check-demo.sh # Verify all 3 ECUs are connected +# Gateway at http://localhost:8080, Web UI at http://localhost:3000 + +# To stop +./stop-demo.sh +``` + +**Features:** + +- 3 independent ECUs (perception, planning, actuation) each running ros2_medkit +- Peer aggregation via mDNS-based automatic ECU discovery +- Cross-ECU function grouping across the full system +- Unified SOVD-compliant REST API spanning all ECUs +- Web UI for browsing aggregated entity hierarchy + ## Getting Started ### Prerequisites @@ -144,7 +171,7 @@ Each demo has its own README with specific instructions. See above Quick Start, or follow the detailed README in each demo directory: ```bash -cd demos/sensor_diagnostics # or turtlebot3_integration +cd demos/sensor_diagnostics # or turtlebot3_integration, moveit_pick_place, multi_ecu_aggregation # Follow the README.md in that directory ``` @@ -184,7 +211,7 @@ Each demo has automated smoke tests that verify the gateway starts and the REST ./tests/smoke_test_moveit.sh # MoveIt pick-and-place (discovery, data, operations, scripts, triggers, logs) ``` -CI runs all 3 demos in parallel - each job builds the Docker image, starts the container, and runs the smoke tests against it. See [CI workflow](.github/workflows/ci.yml). +CI runs all 4 demos in parallel - each job builds the Docker image, starts the container, and runs the smoke tests against it. See [CI workflow](.github/workflows/ci.yml). ## Related Projects diff --git a/demos/multi_ecu_aggregation/CMakeLists.txt b/demos/multi_ecu_aggregation/CMakeLists.txt new file mode 100644 index 0000000..dca84f5 --- /dev/null +++ b/demos/multi_ecu_aggregation/CMakeLists.txt @@ -0,0 +1,68 @@ +cmake_minimum_required(VERSION 3.8) +project(multi_ecu_demo) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +find_package(ament_cmake REQUIRED) +find_package(rclcpp REQUIRED) +find_package(std_msgs REQUIRED) +find_package(sensor_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(nav_msgs REQUIRED) +find_package(vision_msgs REQUIRED) +find_package(diagnostic_msgs REQUIRED) +find_package(rcl_interfaces REQUIRED) +find_package(ros2_medkit_msgs REQUIRED) + +# --- Perception ECU nodes --- +add_executable(lidar_driver src/perception/lidar_driver.cpp) +ament_target_dependencies(lidar_driver rclcpp sensor_msgs diagnostic_msgs rcl_interfaces) + +add_executable(camera_driver src/perception/camera_driver.cpp) +ament_target_dependencies(camera_driver rclcpp sensor_msgs diagnostic_msgs rcl_interfaces) + +add_executable(point_cloud_filter src/perception/point_cloud_filter.cpp) +ament_target_dependencies(point_cloud_filter rclcpp sensor_msgs diagnostic_msgs rcl_interfaces) + +add_executable(object_detector src/perception/object_detector.cpp) +ament_target_dependencies(object_detector rclcpp sensor_msgs vision_msgs diagnostic_msgs rcl_interfaces) + +# --- Planning ECU nodes --- +add_executable(path_planner src/planning/path_planner.cpp) +ament_target_dependencies(path_planner rclcpp nav_msgs geometry_msgs vision_msgs diagnostic_msgs rcl_interfaces) + +add_executable(behavior_planner src/planning/behavior_planner.cpp) +ament_target_dependencies(behavior_planner rclcpp nav_msgs geometry_msgs diagnostic_msgs rcl_interfaces) + +add_executable(task_scheduler src/planning/task_scheduler.cpp) +ament_target_dependencies(task_scheduler rclcpp std_msgs diagnostic_msgs rcl_interfaces) + +# --- Actuation ECU nodes --- +add_executable(motor_controller src/actuation/motor_controller.cpp) +ament_target_dependencies(motor_controller rclcpp sensor_msgs geometry_msgs diagnostic_msgs rcl_interfaces) + +add_executable(joint_driver src/actuation/joint_driver.cpp) +ament_target_dependencies(joint_driver rclcpp sensor_msgs diagnostic_msgs rcl_interfaces) + +add_executable(gripper_controller src/actuation/gripper_controller.cpp) +ament_target_dependencies(gripper_controller rclcpp sensor_msgs geometry_msgs diagnostic_msgs rcl_interfaces) + +# --- Install --- +install(TARGETS + lidar_driver camera_driver point_cloud_filter object_detector + path_planner behavior_planner task_scheduler + motor_controller joint_driver gripper_controller + DESTINATION lib/${PROJECT_NAME} +) + +install(DIRECTORY launch/ DESTINATION share/${PROJECT_NAME}/launch) +install(DIRECTORY config/ DESTINATION share/${PROJECT_NAME}/config) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() +endif() + +ament_package() diff --git a/demos/multi_ecu_aggregation/Dockerfile b/demos/multi_ecu_aggregation/Dockerfile new file mode 100644 index 0000000..0d39be7 --- /dev/null +++ b/demos/multi_ecu_aggregation/Dockerfile @@ -0,0 +1,71 @@ +# Multi-ECU Aggregation Demo +FROM ros:jazzy-ros-base + +ENV DEBIAN_FRONTEND=noninteractive +ENV ROS_DISTRO=jazzy +ENV COLCON_WS=/root/demo_ws + +# Install minimal dependencies (no GUI/simulation) +RUN apt-get update && apt-get install -y --no-install-recommends \ + ros-${ROS_DISTRO}-vision-msgs \ + ros-${ROS_DISTRO}-nav-msgs \ + ros-${ROS_DISTRO}-domain-bridge \ + ros-${ROS_DISTRO}-ament-lint-auto \ + ros-${ROS_DISTRO}-ament-lint-common \ + ros-${ROS_DISTRO}-ament-cmake-gtest \ + ros-${ROS_DISTRO}-yaml-cpp-vendor \ + ros-${ROS_DISTRO}-example-interfaces \ + python3-colcon-common-extensions \ + python3-requests \ + nlohmann-json3-dev \ + libcpp-httplib-dev \ + libsystemd-dev \ + sqlite3 \ + libsqlite3-dev \ + git \ + curl \ + jq \ + && rm -rf /var/lib/apt/lists/* + +# Clone ros2_medkit from GitHub (gateway + dependencies + plugins) +ARG ROS2_MEDKIT_REF=main +WORKDIR ${COLCON_WS}/src +RUN git clone --depth 1 --branch ${ROS2_MEDKIT_REF} https://github.com/selfpatch/ros2_medkit.git && \ + mv ros2_medkit/src/ros2_medkit_cmake . && \ + mv ros2_medkit/src/ros2_medkit_gateway . && \ + mv ros2_medkit/src/ros2_medkit_serialization . && \ + mv ros2_medkit/src/ros2_medkit_msgs . && \ + mv ros2_medkit/src/ros2_medkit_fault_manager . && \ + mv ros2_medkit/src/ros2_medkit_fault_reporter . && \ + mv ros2_medkit/src/ros2_medkit_diagnostic_bridge . && \ + mv ros2_medkit/src/ros2_medkit_plugins/ros2_medkit_graph_provider . && \ + rm -rf ros2_medkit + +# Copy demo package +COPY package.xml CMakeLists.txt ${COLCON_WS}/src/multi_ecu_demo/ +COPY src/ ${COLCON_WS}/src/multi_ecu_demo/src/ +COPY config/ ${COLCON_WS}/src/multi_ecu_demo/config/ +COPY launch/ ${COLCON_WS}/src/multi_ecu_demo/launch/ + +# Copy container scripts +COPY container_scripts/ /var/lib/ros2_medkit/scripts/ +RUN find /var/lib/ros2_medkit/scripts -name "*.bash" -exec chmod +x {} \; + +# Build all packages (skip test dependencies that aren't in ros-base) +WORKDIR ${COLCON_WS} +RUN bash -c "source /opt/ros/jazzy/setup.bash && \ + rosdep update && \ + rosdep install --from-paths src --ignore-src -r -y \ + --skip-keys='ament_cmake_clang_format ament_cmake_clang_tidy test_msgs example_interfaces sqlite3' && \ + colcon build --symlink-install --cmake-args -DBUILD_TESTING=OFF" + +# Setup environment +RUN echo "source /opt/ros/jazzy/setup.bash" >> ~/.bashrc && \ + echo "source ${COLCON_WS}/install/setup.bash" >> ~/.bashrc + +# Expose gateway port +EXPOSE 8080 + +# ECU_LAUNCH env var selects which launch file to run +ENV ECU_LAUNCH=perception.launch.py +CMD ["bash", "-c", "mkdir -p /var/lib/ros2_medkit/rosbags && source /opt/ros/jazzy/setup.bash && source /root/demo_ws/install/setup.bash && ros2 launch multi_ecu_demo \"${ECU_LAUNCH}\""] diff --git a/demos/multi_ecu_aggregation/README.md b/demos/multi_ecu_aggregation/README.md new file mode 100644 index 0000000..6e052a1 --- /dev/null +++ b/demos/multi_ecu_aggregation/README.md @@ -0,0 +1,436 @@ +# Multi-ECU Aggregation Demo + +Multi-ECU diagnostics demo for **ros2_medkit** - 3 independent ECUs with peer aggregation, no Gazebo required! + +This demo showcases ros2_medkit's peer aggregation feature with 3 ECU containers (perception, planning, actuation) running separate ROS 2 domains. The perception ECU acts as the aggregator, merging entity models from planning (via static peer URL) and actuation (via mDNS auto-discovery) into a unified SOVD view. + +## Prerequisites + +The host-side scripts (`inject-*.sh`, `restore-normal.sh`) require `curl` and `jq` to be installed on your machine. + +- **Docker** (with Docker Compose) +- **curl** +- **jq** + +## Quick Start + +```bash +# Start the demo (builds 3 ECU containers + web UI) +./run-demo.sh + +# Wait ~30s for all ECUs to boot, then verify aggregation +curl http://localhost:8080/api/v1/health | jq '.peers' + +# Expected: 2 peers (planning-ecu via static URL, actuation-ecu via mDNS) + +# Inject faults +./inject-sensor-failure.sh # LiDAR failure on Perception ECU +./inject-planning-delay.sh # Path planning delay on Planning ECU +./inject-gripper-jam.sh # Gripper jam on Actuation ECU +./inject-cascade-failure.sh # All of the above at once +./restore-normal.sh # Reset everything + +# Stop the demo +./stop-demo.sh +``` + +**Web UI:** Open http://localhost:3000 to browse the aggregated entity tree. + +## Architecture + +``` + Docker Network (medkit-net) + ┌──────────────────────────────────────────────────────────────────────────┐ + │ │ + │ ┌─────────────────────┐ static URL ┌──────────────────────┐ │ + │ │ Perception ECU │◄──────────────│ Planning ECU │ │ + │ │ (ROS_DOMAIN_ID=10) │ │ (ROS_DOMAIN_ID=20) │ │ + │ │ │ │ │ │ + │ │ lidar_driver │ detections │ path_planner │ │ + │ │ camera_driver │──────────────►│ behavior_planner │ │ + │ │ point_cloud_filter │ domain_bridge│ task_scheduler │ │ + │ │ object_detector │ │ domain_bridge │ │ + │ │ │ │ │ │ + │ │ Gateway :8080 ◄─────┼──── Aggregator│ Gateway :8080 │ │ + │ │ (port exposed) │ │ (internal only) │ │ + │ └─────────────────────┘ └──────────┬───────────┘ │ + │ ▲ │ │ + │ │ mDNS discover │ commands │ + │ │ │ domain_bridge │ + │ ┌────────┴────────────┐ ▼ │ + │ │ Actuation ECU │◄─────────────────────────┘ │ + │ │ (ROS_DOMAIN_ID=30) │ │ + │ │ │ │ + │ │ motor_controller │ │ + │ │ joint_driver │ │ + │ │ gripper_controller │ │ + │ │ domain_bridge │ │ + │ │ │ │ + │ │ Gateway :8080 │ │ + │ │ (mDNS announce) │ │ + │ └─────────────────────┘ │ + │ │ + │ ┌─────────────────────┐ │ + │ │ Web UI :3000 │──── connects to Perception ECU gateway │ + │ └─────────────────────┘ │ + └──────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + Host: localhost:8080 (API), localhost:3000 (UI) +``` + +| Container | ROS_DOMAIN_ID | Role | Port | +|-----------|:---:|------|------| +| **perception-ecu** | 10 | Aggregator - pulls from peers | 8080 (exposed to host) | +| **planning-ecu** | 20 | Peer - discovered via static URL | 8080 (internal) | +| **actuation-ecu** | 30 | Peer - discovered via mDNS announce | 8080 (internal) | +| **medkit-web-ui** | - | Entity browser UI | 3000 (exposed to host) | + +## DDS Isolation + +Each ECU runs in its own DDS domain to simulate physically separate compute units: + +- **Domain 10** - Perception ECU: LiDAR scans, camera frames, detections +- **Domain 20** - Planning ECU: path plans, behavior commands, task schedules +- **Domain 30** - Actuation ECU: motor status, joint positions, gripper state + +Cross-ECU data flows use `ros2_domain_bridge` nodes to relay topics between domains: + +| Bridge | From | To | Topic | Type | +|--------|------|----|-------|------| +| planning_bridge | Domain 10 | Domain 20 | `/perception/detections` | `vision_msgs/msg/Detection2DArray` | +| actuation_bridge | Domain 20 | Domain 30 | `/planning/commands` | `geometry_msgs/msg/Twist` | + +This mirrors a real multi-ECU system where each compute unit has its own DDS network and selected topics are explicitly bridged across domains. + +## Entity Hierarchy (SOVD) + +The aggregated entity model (as seen from the perception ECU gateway at `:8080`) merges entities from all 3 ECUs: + +### Components + +``` +robot-alpha (parent - mobile-robot) +├── perception-ecu (local) +├── planning-ecu (from planning peer) +└── actuation-ecu (from actuation peer) +``` + +All 3 manifests declare `robot-alpha` as the parent component. The aggregator merges them by component ID, creating a single parent with 3 sub-components. + +### Apps (10 total) + +| App | ECU | Category | +|-----|-----|----------| +| lidar-driver | Perception | sensor | +| camera-driver | Perception | sensor | +| point-cloud-filter | Perception | processing | +| object-detector | Perception | processing | +| path-planner | Planning | planning | +| behavior-planner | Planning | planning | +| task-scheduler | Planning | planning | +| motor-controller | Actuation | actuation | +| joint-driver | Actuation | actuation | +| gripper-controller | Actuation | actuation | + +### Functions (3 cross-ECU) + +Functions with the same ID across ECUs are merged. The aggregator combines `hosted_by` lists from all peers: + +| Function | Category | Perception Apps | Planning Apps | Actuation Apps | +|----------|----------|-----------------|---------------|----------------| +| **autonomous-navigation** | navigation | object-detector, point-cloud-filter | path-planner, behavior-planner | motor-controller, joint-driver | +| **obstacle-avoidance** | safety | lidar-driver, object-detector | - | motor-controller, gripper-controller | +| **task-execution** | execution | - | task-scheduler, behavior-planner | motor-controller, joint-driver, gripper-controller | + +This demonstrates how SOVD functions provide a **functional view** spanning multiple ECUs - a fault in any participating app degrades the function. + +## Discovery Mechanisms + +The perception ECU aggregator uses two complementary discovery mechanisms to find its peers: + +### Static Peer URLs (Planning ECU) + +The perception gateway config explicitly lists the planning ECU: + +```yaml +aggregation: + peer_urls: ["http://planning-ecu:8080"] + peer_names: ["planning-ecu"] +``` + +This is the simplest approach - hardcode peer addresses when the network topology is known. + +### mDNS Auto-Discovery (Actuation ECU) + +The actuation ECU announces itself via mDNS: + +```yaml +# actuation_params.yaml +aggregation: + announce: true + mdns_service: "_medkit._tcp.local" +``` + +The perception ECU discovers it by listening for mDNS announcements: + +```yaml +# perception_params.yaml +aggregation: + discover: true + mdns_service: "_medkit._tcp.local" +``` + +### Verifying Peer Discovery + +```bash +# Check which peers the aggregator has discovered +curl http://localhost:8080/api/v1/health | jq '.peers' + +# Expected output: +# [ +# { "name": "planning-ecu", "url": "http://planning-ecu:8080", "status": "connected" }, +# { "name": "actuation-ecu", "url": "http://actuation-ecu:8080", "status": "connected" } +# ] +``` + +## REST API Examples + +All requests go through the perception ECU gateway at `localhost:8080`, which transparently proxies to remote ECUs. + +### Health and Peers + +```bash +# Gateway health with peer status +curl http://localhost:8080/api/v1/health | jq + +# Peer list +curl http://localhost:8080/api/v1/health | jq '.peers' +``` + +### Components + +```bash +# All components (merged from 3 ECUs) +curl http://localhost:8080/api/v1/components | jq + +# Parent component +curl http://localhost:8080/api/v1/components/robot-alpha | jq + +# Sub-components (3 ECUs) +curl http://localhost:8080/api/v1/components/robot-alpha/subcomponents | jq +``` + +### Apps + +```bash +# All 10 apps from all ECUs +curl http://localhost:8080/api/v1/apps | jq + +# Local app (perception ECU) +curl http://localhost:8080/api/v1/apps/lidar-driver | jq + +# Remote app (transparently proxied to planning ECU) +curl http://localhost:8080/api/v1/apps/path-planner | jq + +# Remote app data (proxied to actuation ECU) +curl http://localhost:8080/api/v1/apps/motor-controller/data | jq +``` + +### Functions (Cross-ECU) + +```bash +# All merged functions +curl http://localhost:8080/api/v1/functions | jq + +# Function detail - shows apps from all 3 ECUs +curl http://localhost:8080/api/v1/functions/autonomous-navigation | jq +``` + +### Faults + +```bash +# Aggregated faults from all ECUs +curl http://localhost:8080/api/v1/faults | jq + +# Faults for a specific component +curl http://localhost:8080/api/v1/components/perception-ecu/faults | jq +``` + +## Fault Injection Scenarios + +### Scripts + +| Script | Target ECU | Effect | Affected Functions | +|--------|-----------|--------|-------------------| +| `inject-sensor-failure.sh` | Perception | LiDAR failure (high failure probability) | autonomous-navigation, obstacle-avoidance | +| `inject-planning-delay.sh` | Planning | Path planner delay (5000ms processing) | autonomous-navigation | +| `inject-gripper-jam.sh` | Actuation | Gripper jam (stuck closed) | obstacle-avoidance, task-execution | +| `inject-cascade-failure.sh` | All | All of the above combined | All 3 functions degraded | +| `restore-normal.sh` | All | Reset parameters, clear faults | All restored | + +### Demo Walkthrough + +1. **Start the demo** and wait for all peers to connect: + ```bash + ./run-demo.sh + # Wait ~30s, then verify: + curl http://localhost:8080/api/v1/health | jq '.peers' + ``` + +2. **Verify the aggregated entity model** - 3 sub-components, 10 apps, 3 functions: + ```bash + curl http://localhost:8080/api/v1/components/robot-alpha/subcomponents | jq '.items | length' + curl http://localhost:8080/api/v1/apps | jq '.items | length' + curl http://localhost:8080/api/v1/functions | jq '.items | length' + ``` + +3. **Inject a single-ECU fault** and observe how it degrades cross-ECU functions: + ```bash + ./inject-sensor-failure.sh + curl http://localhost:8080/api/v1/faults | jq + curl http://localhost:8080/api/v1/functions/autonomous-navigation | jq + ``` + +4. **Restore and inject a cascade failure** across all ECUs: + ```bash + ./restore-normal.sh + ./inject-cascade-failure.sh + curl http://localhost:8080/api/v1/faults | jq + ``` + +5. **Verify all 3 functions are degraded**: + ```bash + curl http://localhost:8080/api/v1/functions | jq + ``` + +6. **Restore normal operation**: + ```bash + ./restore-normal.sh + curl http://localhost:8080/api/v1/faults | jq '.items | length' + # Expected: 0 + ``` + +## Container Scripts (Scripts API) + +Each ECU has container-side scripts callable via the gateway REST API. The aggregator transparently proxies script execution to remote ECUs. + +### List Available Scripts + +```bash +# Perception ECU scripts +curl http://localhost:8080/api/v1/components/perception-ecu/scripts | jq + +# Planning ECU scripts (proxied through aggregator) +curl http://localhost:8080/api/v1/components/planning-ecu/scripts | jq + +# Actuation ECU scripts (proxied through aggregator) +curl http://localhost:8080/api/v1/components/actuation-ecu/scripts | jq +``` + +### Execute a Script + +```bash +# Execute inject-gripper-jam on actuation ECU (proxied) +curl -X POST http://localhost:8080/api/v1/components/actuation-ecu/scripts/inject-gripper-jam/executions \ + -H "Content-Type: application/json" \ + -d '{"execution_type":"now"}' | jq +``` + +### Available Scripts per ECU + +| ECU | Script | Description | +|-----|--------|-------------| +| **perception-ecu** | `inject-sensor-failure` | Inject LiDAR failure (high failure probability) | +| **perception-ecu** | `restore-normal` | Reset perception parameters and clear faults | +| **planning-ecu** | `inject-planning-delay` | Inject path planning delay (5000ms) | +| **planning-ecu** | `restore-normal` | Reset planning parameters and clear faults | +| **actuation-ecu** | `inject-gripper-jam` | Inject gripper jam (stuck) | +| **actuation-ecu** | `restore-normal` | Reset actuation parameters and clear faults | + +### Override Gateway URL + +```bash +# Point host-side scripts at a non-default gateway +GATEWAY_URL=http://192.168.1.10:8080 ./inject-sensor-failure.sh +``` + +## Options + +### run-demo.sh + +```bash +./run-demo.sh # Daemon mode (default) +./run-demo.sh --attached # Run in foreground with logs +./run-demo.sh --update # Pull latest images before running +./run-demo.sh --no-cache # Build Docker images without cache +``` + +### stop-demo.sh + +```bash +./stop-demo.sh # Stop containers +./stop-demo.sh --volumes # Stop and remove named volumes +./stop-demo.sh --images # Stop and remove images +``` + +## Scripts + +| Script | Description | +|--------|-------------| +| `run-demo.sh` | Start Docker services (3 ECUs + web UI) | +| `stop-demo.sh` | Stop Docker services | +| `inject-sensor-failure.sh` | Inject LiDAR sensor failure on Perception ECU | +| `inject-planning-delay.sh` | Inject path planning delay on Planning ECU | +| `inject-gripper-jam.sh` | Inject gripper jam on Actuation ECU | +| `inject-cascade-failure.sh` | Inject faults across all 3 ECUs | +| `restore-normal.sh` | Reset all ECUs and clear faults | + +> **Note:** All fault injection and restore scripts are also available via the [Scripts API](#container-scripts-scripts-api) - callable as REST endpoints without requiring the host-side scripts. + +## Troubleshooting + +### mDNS Discovery Not Working + +The actuation ECU uses mDNS to announce itself to the perception aggregator. This requires multicast to work on the Docker bridge network. + +- Verify the Docker network supports multicast (the default `bridge` driver does) +- Check that the `medkit-net` network was created: `docker network ls | grep medkit` +- Inspect mDNS traffic: `docker exec perception_ecu avahi-browse -at` (if avahi-utils is installed) +- Fallback: add `actuation-ecu` to `peer_urls` in `perception_params.yaml` as a static peer + +### Gateway Startup Order + +The perception ECU aggregator starts immediately but peers may take a few seconds to boot. Initial requests may show fewer entities until all peers connect. + +- The aggregator retries peer connections periodically +- Wait 20-30 seconds after `./run-demo.sh` before verifying +- Check peer status: `curl http://localhost:8080/api/v1/health | jq '.peers'` + +### Port Conflicts + +The demo exposes port 8080 (gateway) and 3000 (web UI) on the host. + +- If port 8080 is in use: change the port mapping in `docker-compose.yml` under `perception-ecu.ports` +- If port 3000 is in use: change the port mapping under `medkit-web-ui.ports` +- Internal gateway ports (8080 on planning/actuation) are not exposed to the host + +### Containers Not Starting + +```bash +# Check container status +docker compose ps + +# View logs for a specific ECU +docker compose logs perception-ecu +docker compose logs planning-ecu +docker compose logs actuation-ecu + +# Rebuild from scratch +./stop-demo.sh --images +./run-demo.sh --no-cache +``` + +## License + +Apache 2.0 - See [LICENSE](../../LICENSE) diff --git a/demos/multi_ecu_aggregation/check-demo.sh b/demos/multi_ecu_aggregation/check-demo.sh new file mode 100755 index 0000000..213ddab --- /dev/null +++ b/demos/multi_ecu_aggregation/check-demo.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Multi-ECU Aggregation Demo - Readiness Check +# Waits for the gateway to become healthy, verifies peer connections, and prints entity counts + +set -eu + +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +API_BASE="${GATEWAY_URL}/api/v1" +TIMEOUT=60 + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +BLUE='\033[0;34m' +BOLD='\033[1m' +NC='\033[0m' + +echo_success() { echo -e "${GREEN}ok${NC} $1"; } +echo_fail() { echo -e "${RED}FAIL${NC} $1"; } +echo_warn() { echo -e "${YELLOW}WARN${NC} $1"; } +echo_info() { echo -e "${BLUE}--${NC} $1"; } + +# Check dependencies +for cmd in curl jq; do + if ! command -v "$cmd" >/dev/null 2>&1; then + echo_fail "Required tool '$cmd' is not installed." + exit 1 + fi +done + +echo "" +echo -e "${BOLD}Multi-ECU Aggregation Demo - Readiness Check${NC}" +echo "==============================================" +echo "" + +# --- 1. Poll health endpoint until it responds --- +echo -n "Waiting for gateway at ${GATEWAY_URL} " +elapsed=0 +while [ "$elapsed" -lt "$TIMEOUT" ]; do + if curl -sf "${API_BASE}/health" >/dev/null 2>&1; then + break + fi + echo -n "." + sleep 2 + elapsed=$((elapsed + 2)) +done +echo "" + +if [ "$elapsed" -ge "$TIMEOUT" ]; then + echo_fail "Gateway did not respond within ${TIMEOUT}s" + echo " Start the demo first: ./run-demo.sh" + exit 1 +fi + +HEALTH_JSON=$(curl -sf "${API_BASE}/health") +status=$(echo "$HEALTH_JSON" | jq -r '.status') +uptime=$(echo "$HEALTH_JSON" | jq -r '.uptime_seconds // "n/a"') +echo_success "Gateway is healthy (status=${status}, uptime=${uptime}s)" +echo "" + +# --- 2. Check aggregation peers --- +echo -e "${BOLD}Peer Connections${NC}" +echo "----------------" + +PEER_COUNT=$(echo "$HEALTH_JSON" | jq '[.peers // [] | .[] ] | length') + +if [ "$PEER_COUNT" -ge 2 ]; then + echo_success "${PEER_COUNT} peers connected" +else + echo_warn "Expected 2 peers, found ${PEER_COUNT}" +fi + +# Print individual peer status +echo "$HEALTH_JSON" | jq -r ' + .peers // [] | .[] | + " \(.name // .url) - \(.status // "unknown")" +' 2>/dev/null || true + +echo "" + +# --- 3. Entity counts --- +echo -e "${BOLD}Entity Counts${NC}" +echo "-------------" + +for entity in areas components apps functions; do + response=$(curl -sf "${API_BASE}/${entity}" 2>/dev/null) || response="" + if [ -n "$response" ]; then + count=$(echo "$response" | jq '.items | length') + echo_info "${entity}: ${count}" + else + echo_warn "${entity}: endpoint unavailable" + fi +done + +echo "" + +# --- 4. Summary --- +echo -e "${BOLD}Summary${NC}" +echo "-------" + +if [ "$PEER_COUNT" -ge 2 ]; then + echo_success "Demo is ready - all peers connected" + echo "" + echo " Web UI: http://localhost:3000" + echo " REST API: ${API_BASE}/" + echo "" + echo " Try:" + echo " curl ${API_BASE}/components | jq '.items[].id'" + echo " curl ${API_BASE}/functions | jq '.items[].id'" + echo " ./inject-cascade-failure.sh" + exit 0 +else + echo_fail "Demo is not fully ready - waiting for peers" + echo "" + echo " Peers may still be booting. Retry in a few seconds:" + echo " ./check-demo.sh" + echo "" + echo " To inspect peer config:" + echo " curl ${API_BASE}/health | jq '.peers'" + exit 1 +fi diff --git a/demos/multi_ecu_aggregation/config/actuation_manifest.yaml b/demos/multi_ecu_aggregation/config/actuation_manifest.yaml new file mode 100644 index 0000000..e7a3cb7 --- /dev/null +++ b/demos/multi_ecu_aggregation/config/actuation_manifest.yaml @@ -0,0 +1,88 @@ +# SOVD Manifest for Actuation ECU +# Defines motor, joint, and gripper control entities +manifest_version: "1.0" + +metadata: + name: "actuation-ecu" + description: "Actuation subsystem - motors, joints, and grippers" + version: "0.1.0" + +config: + unmanifested_nodes: ignore + inherit_runtime_resources: true + +components: + - id: robot-alpha + name: "Robot Alpha" + type: "mobile-robot" + + - id: actuation-ecu + name: "Actuation ECU" + type: "compute-unit" + parent_component_id: robot-alpha + +apps: + - id: motor-controller + name: "Motor Controller" + category: "actuation" + is_located_on: actuation-ecu + ros_binding: + node_name: motor_controller + namespace: /actuation + + - id: joint-driver + name: "Joint Driver" + category: "actuation" + is_located_on: actuation-ecu + ros_binding: + node_name: joint_driver + namespace: /actuation + + - id: gripper-controller + name: "Gripper Controller" + category: "actuation" + is_located_on: actuation-ecu + ros_binding: + node_name: gripper_controller + namespace: /actuation + + # Infrastructure apps + - id: medkit-gateway + name: "Medkit Gateway" + category: "infrastructure" + is_located_on: actuation-ecu + ros_binding: + node_name: ros2_medkit_gateway + namespace: /diagnostics + + - id: medkit-fault-manager + name: "Fault Manager" + category: "infrastructure" + is_located_on: actuation-ecu + ros_binding: + node_name: fault_manager + namespace: / + + - id: medkit-diagnostic-bridge + name: "Diagnostic Bridge" + category: "infrastructure" + is_located_on: actuation-ecu + ros_binding: + node_name: diagnostic_bridge + namespace: /bridge + +functions: + - id: autonomous-navigation + name: "Autonomous Navigation" + category: "navigation" + hosted_by: [motor-controller, joint-driver] + + - id: obstacle-avoidance + name: "Obstacle Avoidance" + category: "safety" + hosted_by: [motor-controller, gripper-controller] + + - id: task-execution + name: "Task Execution" + category: "execution" + hosted_by: [motor-controller, joint-driver, gripper-controller] diff --git a/demos/multi_ecu_aggregation/config/actuation_params.yaml b/demos/multi_ecu_aggregation/config/actuation_params.yaml new file mode 100644 index 0000000..5317f5e --- /dev/null +++ b/demos/multi_ecu_aggregation/config/actuation_params.yaml @@ -0,0 +1,59 @@ +# ros2_medkit gateway configuration for Actuation ECU +# Announces via mDNS for peer discovery by the primary aggregator + +# Gateway runs in /diagnostics namespace +diagnostics: + ros2_medkit_gateway: + ros__parameters: + server: + host: "0.0.0.0" + port: 8080 + + discovery: + mode: "hybrid" + manifest_path: "" # Set via launch argument + manifest_strict_validation: true + runtime: + create_synthetic_components: false + create_synthetic_areas: false + + aggregation: + enabled: true + announce: true + mdns_service: "_medkit._tcp.local" + + cors: + allowed_origins: ["*"] + allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"] + + scripts: + scripts_dir: "/var/lib/ros2_medkit/scripts" + +# Actuation node parameters (namespace: /actuation) +actuation: + motor_controller: + ros__parameters: + torque_noise: 0.01 + failure_probability: 0.0 + status_rate: 20.0 + + joint_driver: + ros__parameters: + inject_overheat: false + drift_rate: 0.0 + failure_probability: 0.0 + driver_rate: 50.0 + + gripper_controller: + ros__parameters: + inject_jam: false + failure_probability: 0.0 + gripper_rate: 10.0 + +# Fault Manager configuration (runs in root namespace) +fault_manager: + ros__parameters: + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + snapshots: + enabled: false diff --git a/demos/multi_ecu_aggregation/config/domain_bridge/actuation_bridge.yaml b/demos/multi_ecu_aggregation/config/domain_bridge/actuation_bridge.yaml new file mode 100644 index 0000000..f6a9783 --- /dev/null +++ b/demos/multi_ecu_aggregation/config/domain_bridge/actuation_bridge.yaml @@ -0,0 +1,6 @@ +name: actuation_bridge +from_domain: 20 +to_domain: 30 +topics: + /planning/commands: + type: "geometry_msgs/msg/Twist" diff --git a/demos/multi_ecu_aggregation/config/domain_bridge/planning_bridge.yaml b/demos/multi_ecu_aggregation/config/domain_bridge/planning_bridge.yaml new file mode 100644 index 0000000..fa61162 --- /dev/null +++ b/demos/multi_ecu_aggregation/config/domain_bridge/planning_bridge.yaml @@ -0,0 +1,6 @@ +name: planning_bridge +from_domain: 10 +to_domain: 20 +topics: + /perception/detections: + type: "vision_msgs/msg/Detection2DArray" diff --git a/demos/multi_ecu_aggregation/config/perception_manifest.yaml b/demos/multi_ecu_aggregation/config/perception_manifest.yaml new file mode 100644 index 0000000..b44400e --- /dev/null +++ b/demos/multi_ecu_aggregation/config/perception_manifest.yaml @@ -0,0 +1,91 @@ +# SOVD Manifest for Perception ECU +# Defines sensor and detection entities for the perception subsystem +manifest_version: "1.0" + +metadata: + name: "perception-ecu" + description: "Perception subsystem - sensors and detection" + version: "0.1.0" + +config: + unmanifested_nodes: ignore + inherit_runtime_resources: true + +components: + - id: robot-alpha + name: "Robot Alpha" + type: "mobile-robot" + + - id: perception-ecu + name: "Perception ECU" + type: "compute-unit" + parent_component_id: robot-alpha + +apps: + - id: lidar-driver + name: "LiDAR Driver" + category: "sensor" + is_located_on: perception-ecu + ros_binding: + node_name: lidar_driver + namespace: /perception + + - id: camera-driver + name: "Camera Driver" + category: "sensor" + is_located_on: perception-ecu + ros_binding: + node_name: camera_driver + namespace: /perception + + - id: point-cloud-filter + name: "Point Cloud Filter" + category: "processing" + is_located_on: perception-ecu + ros_binding: + node_name: point_cloud_filter + namespace: /perception + + - id: object-detector + name: "Object Detector" + category: "processing" + is_located_on: perception-ecu + ros_binding: + node_name: object_detector + namespace: /perception + + # Infrastructure apps (gateway, fault manager, diagnostic bridge) + - id: medkit-gateway + name: "Medkit Gateway" + category: "infrastructure" + is_located_on: perception-ecu + ros_binding: + node_name: ros2_medkit_gateway + namespace: /diagnostics + + - id: medkit-fault-manager + name: "Fault Manager" + category: "infrastructure" + is_located_on: perception-ecu + ros_binding: + node_name: fault_manager + namespace: / + + - id: medkit-diagnostic-bridge + name: "Diagnostic Bridge" + category: "infrastructure" + is_located_on: perception-ecu + ros_binding: + node_name: diagnostic_bridge + namespace: /bridge + +functions: + - id: autonomous-navigation + name: "Autonomous Navigation" + category: "navigation" + hosted_by: [object-detector, point-cloud-filter] + + - id: obstacle-avoidance + name: "Obstacle Avoidance" + category: "safety" + hosted_by: [lidar-driver, object-detector] diff --git a/demos/multi_ecu_aggregation/config/perception_params.yaml b/demos/multi_ecu_aggregation/config/perception_params.yaml new file mode 100644 index 0000000..ce9b520 --- /dev/null +++ b/demos/multi_ecu_aggregation/config/perception_params.yaml @@ -0,0 +1,80 @@ +# ros2_medkit gateway configuration for Perception ECU +# This ECU acts as the primary aggregator, pulling data from planning and actuation ECUs + +# Gateway runs in /diagnostics namespace +diagnostics: + ros2_medkit_gateway: + ros__parameters: + server: + host: "0.0.0.0" + port: 8080 + + discovery: + mode: "hybrid" + manifest_path: "" # Set via launch argument + manifest_strict_validation: true + runtime: + create_synthetic_components: false + create_synthetic_areas: false + + aggregation: + enabled: true + timeout_ms: 3000 + announce: false + discover: true + mdns_service: "_medkit._tcp.local" + peer_urls: ["http://planning-ecu:8080"] + peer_names: ["planning-ecu"] + + cors: + allowed_origins: ["*"] + allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"] + + scripts: + scripts_dir: "/var/lib/ros2_medkit/scripts" + +# Perception node parameters (namespace: /perception) +perception: + lidar_driver: + ros__parameters: + scan_rate: 10.0 + range_min: 0.12 + range_max: 3.5 + angle_min: -3.14159 + angle_max: 3.14159 + num_readings: 360 + noise_stddev: 0.01 + failure_probability: 0.0 + inject_nan: false + drift_rate: 0.0 + + camera_driver: + ros__parameters: + rate: 30.0 + width: 640 + height: 480 + noise_level: 0.0 + failure_probability: 0.0 + inject_black_frames: false + brightness: 128 + + point_cloud_filter: + ros__parameters: + drop_rate: 0.0 + delay_ms: 0 + failure_probability: 0.0 + + object_detector: + ros__parameters: + false_positive_rate: 0.0 + miss_rate: 0.0 + failure_probability: 0.0 + detection_rate: 5.0 + +# Fault Manager configuration (runs in root namespace) +fault_manager: + ros__parameters: + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + snapshots: + enabled: false diff --git a/demos/multi_ecu_aggregation/config/planning_manifest.yaml b/demos/multi_ecu_aggregation/config/planning_manifest.yaml new file mode 100644 index 0000000..2f4a5c2 --- /dev/null +++ b/demos/multi_ecu_aggregation/config/planning_manifest.yaml @@ -0,0 +1,83 @@ +# SOVD Manifest for Planning ECU +# Defines path planning, behavior, and task scheduling entities +manifest_version: "1.0" + +metadata: + name: "planning-ecu" + description: "Planning subsystem - path planning and behavior control" + version: "0.1.0" + +config: + unmanifested_nodes: ignore + inherit_runtime_resources: true + +components: + - id: robot-alpha + name: "Robot Alpha" + type: "mobile-robot" + + - id: planning-ecu + name: "Planning ECU" + type: "compute-unit" + parent_component_id: robot-alpha + +apps: + - id: path-planner + name: "Path Planner" + category: "planning" + is_located_on: planning-ecu + ros_binding: + node_name: path_planner + namespace: /planning + + - id: behavior-planner + name: "Behavior Planner" + category: "planning" + is_located_on: planning-ecu + ros_binding: + node_name: behavior_planner + namespace: /planning + + - id: task-scheduler + name: "Task Scheduler" + category: "planning" + is_located_on: planning-ecu + ros_binding: + node_name: task_scheduler + namespace: /planning + + # Infrastructure apps + - id: medkit-gateway + name: "Medkit Gateway" + category: "infrastructure" + is_located_on: planning-ecu + ros_binding: + node_name: ros2_medkit_gateway + namespace: /diagnostics + + - id: medkit-fault-manager + name: "Fault Manager" + category: "infrastructure" + is_located_on: planning-ecu + ros_binding: + node_name: fault_manager + namespace: / + + - id: medkit-diagnostic-bridge + name: "Diagnostic Bridge" + category: "infrastructure" + is_located_on: planning-ecu + ros_binding: + node_name: diagnostic_bridge + namespace: /bridge + +functions: + - id: autonomous-navigation + name: "Autonomous Navigation" + category: "navigation" + hosted_by: [path-planner, behavior-planner] + + - id: task-execution + name: "Task Execution" + category: "execution" + hosted_by: [task-scheduler, behavior-planner] diff --git a/demos/multi_ecu_aggregation/config/planning_params.yaml b/demos/multi_ecu_aggregation/config/planning_params.yaml new file mode 100644 index 0000000..5353ffd --- /dev/null +++ b/demos/multi_ecu_aggregation/config/planning_params.yaml @@ -0,0 +1,56 @@ +# ros2_medkit gateway configuration for Planning ECU +# No aggregation - this ECU is a peer, not an aggregator + +# Gateway runs in /diagnostics namespace +diagnostics: + ros2_medkit_gateway: + ros__parameters: + server: + host: "0.0.0.0" + port: 8080 + + discovery: + mode: "hybrid" + manifest_path: "" # Set via launch argument + manifest_strict_validation: true + runtime: + create_synthetic_components: false + create_synthetic_areas: false + + aggregation: + enabled: false + + cors: + allowed_origins: ["*"] + allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"] + + scripts: + scripts_dir: "/var/lib/ros2_medkit/scripts" + +# Planning node parameters (namespace: /planning) +planning: + path_planner: + ros__parameters: + planning_delay_ms: 0 + failure_probability: 0.0 + planning_rate: 5.0 + + behavior_planner: + ros__parameters: + inject_wrong_direction: false + failure_probability: 0.0 + command_rate: 10.0 + + task_scheduler: + ros__parameters: + inject_stuck: false + failure_probability: 0.0 + schedule_rate: 1.0 + +# Fault Manager configuration (runs in root namespace) +fault_manager: + ros__parameters: + storage_type: "sqlite" + database_path: "/var/lib/ros2_medkit/faults.db" + snapshots: + enabled: false diff --git a/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/metadata.json b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/metadata.json new file mode 100644 index 0000000..7824fbe --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "inject-gripper-jam", + "description": "Inject gripper jam (gripper controller stuck)", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/script.bash b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/script.bash new file mode 100644 index 0000000..e0a2f64 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/inject-gripper-jam/script.bash @@ -0,0 +1,12 @@ +#!/bin/bash +# Inject gripper jam - gripper controller stuck +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ros2 param set /actuation/gripper_controller inject_jam true + +echo '{"status": "injected", "parameter": "inject_jam", "value": true}' diff --git a/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/metadata.json b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/metadata.json new file mode 100644 index 0000000..8d5dde7 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "restore-normal", + "description": "Reset all actuation parameters to defaults and clear faults", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/script.bash b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/script.bash new file mode 100644 index 0000000..bb583e7 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/actuation-ecu/restore-normal/script.bash @@ -0,0 +1,38 @@ +#!/bin/bash +# Reset all actuation node parameters to defaults +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ERRORS=0 + +# Motor controller +ros2 param set /actuation/motor_controller torque_noise 0.01 || ERRORS=$((ERRORS + 1)) +ros2 param set /actuation/motor_controller failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +# Joint driver +ros2 param set /actuation/joint_driver inject_overheat false || ERRORS=$((ERRORS + 1)) +ros2 param set /actuation/joint_driver drift_rate 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /actuation/joint_driver failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +# Gripper controller +ros2 param set /actuation/gripper_controller inject_jam false || ERRORS=$((ERRORS + 1)) +ros2 param set /actuation/gripper_controller failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +if [ $ERRORS -gt 0 ]; then + echo "{\"status\": \"partial\", \"errors\": $ERRORS}" + exit 1 +fi + +# Clear faults +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +API_BASE="${GATEWAY_URL}/api/v1" +echo "Clearing faults..." +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true +sleep 2 +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true + +echo '{"status": "restored", "ecu": "actuation"}' diff --git a/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/metadata.json b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/metadata.json new file mode 100644 index 0000000..655884b --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "inject-sensor-failure", + "description": "Inject LiDAR sensor failure (high failure probability)", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/script.bash b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/script.bash new file mode 100644 index 0000000..321148f --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/inject-sensor-failure/script.bash @@ -0,0 +1,12 @@ +#!/bin/bash +# Inject LiDAR sensor failure - high failure probability +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ros2 param set /perception/lidar_driver failure_probability 0.8 + +echo '{"status": "injected", "parameter": "failure_probability", "value": 0.8}' diff --git a/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/metadata.json b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/metadata.json new file mode 100644 index 0000000..f8f3d0d --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "restore-normal", + "description": "Reset all perception parameters to defaults and clear faults", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/script.bash b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/script.bash new file mode 100644 index 0000000..9f22b5b --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/perception-ecu/restore-normal/script.bash @@ -0,0 +1,46 @@ +#!/bin/bash +# Reset all perception node parameters to defaults +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ERRORS=0 + +# LiDAR driver +ros2 param set /perception/lidar_driver failure_probability 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/lidar_driver inject_nan false || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/lidar_driver noise_stddev 0.01 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/lidar_driver drift_rate 0.0 || ERRORS=$((ERRORS + 1)) + +# Camera driver +ros2 param set /perception/camera_driver failure_probability 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/camera_driver noise_level 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/camera_driver inject_black_frames false || ERRORS=$((ERRORS + 1)) + +# Point cloud filter +ros2 param set /perception/point_cloud_filter failure_probability 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/point_cloud_filter drop_rate 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/point_cloud_filter delay_ms 0 || ERRORS=$((ERRORS + 1)) + +# Object detector +ros2 param set /perception/object_detector failure_probability 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/object_detector false_positive_rate 0.0 || ERRORS=$((ERRORS + 1)) +ros2 param set /perception/object_detector miss_rate 0.0 || ERRORS=$((ERRORS + 1)) + +if [ $ERRORS -gt 0 ]; then + echo "{\"status\": \"partial\", \"errors\": $ERRORS}" + exit 1 +fi + +# Clear faults +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +API_BASE="${GATEWAY_URL}/api/v1" +echo "Clearing faults..." +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true +sleep 2 +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true + +echo '{"status": "restored", "ecu": "perception"}' diff --git a/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/metadata.json b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/metadata.json new file mode 100644 index 0000000..03f1127 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "inject-planning-delay", + "description": "Inject path planning delay (5000ms processing time)", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/script.bash b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/script.bash new file mode 100644 index 0000000..d48225b --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/inject-planning-delay/script.bash @@ -0,0 +1,12 @@ +#!/bin/bash +# Inject path planning delay - 5000ms processing time +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ros2 param set /planning/path_planner planning_delay_ms 5000 + +echo '{"status": "injected", "parameter": "planning_delay_ms", "value": 5000}' diff --git a/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/metadata.json b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/metadata.json new file mode 100644 index 0000000..1fd3e29 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/metadata.json @@ -0,0 +1,5 @@ +{ + "name": "restore-normal", + "description": "Reset all planning parameters to defaults and clear faults", + "format": "bash" +} diff --git a/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/script.bash b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/script.bash new file mode 100644 index 0000000..68e4933 --- /dev/null +++ b/demos/multi_ecu_aggregation/container_scripts/planning-ecu/restore-normal/script.bash @@ -0,0 +1,37 @@ +#!/bin/bash +# Reset all planning node parameters to defaults +set -eu + +# shellcheck source=/dev/null +source /opt/ros/jazzy/setup.bash +# shellcheck source=/dev/null +source /root/demo_ws/install/setup.bash + +ERRORS=0 + +# Path planner +ros2 param set /planning/path_planner planning_delay_ms 0 || ERRORS=$((ERRORS + 1)) +ros2 param set /planning/path_planner failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +# Behavior planner +ros2 param set /planning/behavior_planner inject_wrong_direction false || ERRORS=$((ERRORS + 1)) +ros2 param set /planning/behavior_planner failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +# Task scheduler +ros2 param set /planning/task_scheduler inject_stuck false || ERRORS=$((ERRORS + 1)) +ros2 param set /planning/task_scheduler failure_probability 0.0 || ERRORS=$((ERRORS + 1)) + +if [ $ERRORS -gt 0 ]; then + echo "{\"status\": \"partial\", \"errors\": $ERRORS}" + exit 1 +fi + +# Clear faults +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +API_BASE="${GATEWAY_URL}/api/v1" +echo "Clearing faults..." +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true +sleep 2 +curl -sf -X DELETE "${API_BASE}/faults" > /dev/null 2>&1 || true + +echo '{"status": "restored", "ecu": "planning"}' diff --git a/demos/multi_ecu_aggregation/docker-compose.yml b/demos/multi_ecu_aggregation/docker-compose.yml new file mode 100644 index 0000000..4c69090 --- /dev/null +++ b/demos/multi_ecu_aggregation/docker-compose.yml @@ -0,0 +1,109 @@ +services: + # Perception ECU - cameras, lidar, object detection (domain 10) + perception-ecu: + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: perception_ecu + hostname: perception-ecu + ports: + - "8080:8080" + environment: + - ROS_DOMAIN_ID=10 + - ECU_LAUNCH=perception.launch.py + networks: [medkit-net] + stdin_open: true + tty: true + + # Planning ECU - path planning, behavior planning, task scheduling (domain 20) + planning-ecu: + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: planning_ecu + hostname: planning-ecu + environment: + - ROS_DOMAIN_ID=20 + - ECU_LAUNCH=planning.launch.py + networks: [medkit-net] + stdin_open: true + tty: true + + # Actuation ECU - motor control, joint drivers, gripper (domain 30) + actuation-ecu: + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: actuation_ecu + hostname: actuation-ecu + environment: + - ROS_DOMAIN_ID=30 + - ECU_LAUNCH=actuation.launch.py + networks: [medkit-net] + stdin_open: true + tty: true + + # Web UI for visualization (connects to perception ECU gateway) + medkit-web-ui: + image: ghcr.io/selfpatch/ros2_medkit_web_ui:latest + container_name: multi_ecu_web_ui + ports: + - "3000:80" + depends_on: + - perception-ecu + networks: [medkit-net] + + # --- CI services (headless, no web UI) --- + # NOTE: CI and default profiles are mutually exclusive - do not start both + # simultaneously. CI services reuse hostnames for config compatibility. + + # Perception ECU for CI testing + perception-ecu-ci: + profiles: ["ci"] + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: perception_ecu_ci + hostname: perception-ecu + ports: + - "8080:8080" + environment: + - ROS_DOMAIN_ID=10 + - ECU_LAUNCH=perception.launch.py + networks: [medkit-net] + + # Planning ECU for CI testing + planning-ecu-ci: + profiles: ["ci"] + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: planning_ecu_ci + hostname: planning-ecu + environment: + - ROS_DOMAIN_ID=20 + - ECU_LAUNCH=planning.launch.py + networks: [medkit-net] + + # Actuation ECU for CI testing + actuation-ecu-ci: + profiles: ["ci"] + build: + context: . + dockerfile: Dockerfile + image: multi-ecu-demo:local + container_name: actuation_ecu_ci + hostname: actuation-ecu + environment: + - ROS_DOMAIN_ID=30 + - ECU_LAUNCH=actuation.launch.py + networks: [medkit-net] + +networks: + medkit-net: + driver: bridge diff --git a/demos/multi_ecu_aggregation/inject-cascade-failure.sh b/demos/multi_ecu_aggregation/inject-cascade-failure.sh new file mode 100755 index 0000000..32299cb --- /dev/null +++ b/demos/multi_ecu_aggregation/inject-cascade-failure.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Inject cascade failure across all ECUs +# Injects: lidar-driver failure_probability=0.8 + path-planner planning_delay_ms=5000 + gripper-controller inject_jam=true +# Affects: all functions degraded +set -u +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/../../lib/scripts-api.sh" + +echo "Injecting cascade failure across all ECUs..." + +ERRORS=0 + +# Perception ECU - LiDAR sensor failure +execute_script "components" "perception-ecu" "inject-sensor-failure" "Inject LiDAR sensor failure (Perception ECU)" || ERRORS=$((ERRORS + 1)) + +# Planning ECU - planning delay +execute_script "components" "planning-ecu" "inject-planning-delay" "Inject planning delay (Planning ECU)" || ERRORS=$((ERRORS + 1)) + +# Actuation ECU - gripper jam +execute_script "components" "actuation-ecu" "inject-gripper-jam" "Inject gripper jam (Actuation ECU)" || ERRORS=$((ERRORS + 1)) + +echo "" +if [ $ERRORS -gt 0 ]; then + echo "Warning: $ERRORS ECU(s) failed injection" + exit 1 +fi +echo "Cascade failure injected across all ECUs" +echo "Check: curl http://localhost:8080/api/v1/functions | jq" diff --git a/demos/multi_ecu_aggregation/inject-gripper-jam.sh b/demos/multi_ecu_aggregation/inject-gripper-jam.sh new file mode 100755 index 0000000..03f6030 --- /dev/null +++ b/demos/multi_ecu_aggregation/inject-gripper-jam.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Inject gripper jam on Actuation ECU +# Sets gripper-controller inject_jam=true +# Affects: obstacle-avoidance, task-execution +set -eu +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/../../lib/scripts-api.sh" + +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +if ! curl -sf "${GATEWAY_URL}/api/v1/health" > /dev/null 2>&1; then + echo "Error: Gateway not reachable at ${GATEWAY_URL}. Is the demo running?" + exit 1 +fi + +execute_script "components" "actuation-ecu" "inject-gripper-jam" "Inject gripper jam" diff --git a/demos/multi_ecu_aggregation/inject-planning-delay.sh b/demos/multi_ecu_aggregation/inject-planning-delay.sh new file mode 100755 index 0000000..48dd819 --- /dev/null +++ b/demos/multi_ecu_aggregation/inject-planning-delay.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Inject path planning delay on Planning ECU +# Sets path-planner planning_delay_ms=5000 +# Affects: autonomous-navigation +set -eu +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/../../lib/scripts-api.sh" + +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +if ! curl -sf "${GATEWAY_URL}/api/v1/health" > /dev/null 2>&1; then + echo "Error: Gateway not reachable at ${GATEWAY_URL}. Is the demo running?" + exit 1 +fi + +execute_script "components" "planning-ecu" "inject-planning-delay" "Inject planning delay" diff --git a/demos/multi_ecu_aggregation/inject-sensor-failure.sh b/demos/multi_ecu_aggregation/inject-sensor-failure.sh new file mode 100755 index 0000000..61e4fd5 --- /dev/null +++ b/demos/multi_ecu_aggregation/inject-sensor-failure.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Inject LiDAR sensor failure on Perception ECU +# Affects: autonomous-navigation, obstacle-avoidance +set -eu +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/../../lib/scripts-api.sh" + +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +if ! curl -sf "${GATEWAY_URL}/api/v1/health" > /dev/null 2>&1; then + echo "Error: Gateway not reachable at ${GATEWAY_URL}. Is the demo running?" + exit 1 +fi + +execute_script "components" "perception-ecu" "inject-sensor-failure" "Inject LiDAR sensor failure" diff --git a/demos/multi_ecu_aggregation/launch/actuation.launch.py b/demos/multi_ecu_aggregation/launch/actuation.launch.py new file mode 100644 index 0000000..d5da54f --- /dev/null +++ b/demos/multi_ecu_aggregation/launch/actuation.launch.py @@ -0,0 +1,133 @@ +# Copyright 2026 selfpatch +# Licensed under the Apache License, Version 2.0 + +"""Launch Actuation ECU nodes with ros2_medkit gateway (mDNS announce enabled). + +Actuation ECU runs as a standalone peer with mDNS announcement so the +perception aggregator can discover it automatically. A domain_bridge node +bridges planning commands from domain 20 to domain 30. + +Nodes launched: + /actuation/motor_controller - Motor torque control + /actuation/joint_driver - Joint position driver + /actuation/gripper_controller - Gripper open/close control + /diagnostics/ros2_medkit_gateway - SOVD gateway (mDNS announce enabled) + /fault_manager - Fault aggregation and storage + /bridge/diagnostic_bridge - Bridges /diagnostics topic to fault_manager + actuation_bridge - Domain bridge for cross-ECU topic relay +""" + +import os + +from ament_index_python.packages import get_package_prefix +from ament_index_python.packages import get_package_share_directory +from ament_index_python.packages import PackageNotFoundError +from launch import LaunchDescription +from launch_ros.actions import Node + + +def _resolve_plugin_path(package_name, lib_name): + """Resolve a gateway plugin .so path, returning empty string if not found.""" + try: + prefix = get_package_prefix(package_name) + path = os.path.join(prefix, 'lib', package_name, f'lib{lib_name}.so') + if os.path.isfile(path): + return path + except PackageNotFoundError: + pass + return '' + + +def generate_launch_description(): + pkg_dir = get_package_share_directory('multi_ecu_demo') + + # Config file paths + params_file = os.path.join(pkg_dir, 'config', 'actuation_params.yaml') + manifest_file = os.path.join( + pkg_dir, 'config', 'actuation_manifest.yaml') + bridge_config_path = os.path.join( + pkg_dir, 'config', 'domain_bridge', 'actuation_bridge.yaml') + + # Resolve optional plugin paths + graph_provider_path = _resolve_plugin_path( + 'ros2_medkit_graph_provider', 'ros2_medkit_graph_provider_plugin') + + plugin_overrides = {} + active_plugins = [] + if graph_provider_path: + active_plugins.append('graph_provider') + plugin_overrides['plugins.graph_provider.path'] = graph_provider_path + plugin_overrides['plugins'] = active_plugins + + return LaunchDescription([ + # ===== Actuation Nodes (under /actuation namespace) ===== + Node( + package='multi_ecu_demo', + executable='motor_controller', + name='motor_controller', + namespace='actuation', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='joint_driver', + name='joint_driver', + namespace='actuation', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='gripper_controller', + name='gripper_controller', + namespace='actuation', + output='screen', + parameters=[params_file], + ), + + # ===== Domain Bridge (cross-ECU topic relay) ===== + Node( + package='domain_bridge', + executable='domain_bridge', + name='actuation_bridge', + arguments=[bridge_config_path], + ), + + # ===== Diagnostic Bridge (Legacy path) ===== + Node( + package='ros2_medkit_diagnostic_bridge', + executable='diagnostic_bridge_node', + name='diagnostic_bridge', + namespace='bridge', + output='screen', + parameters=[{ + 'diagnostics_topic': '/diagnostics', + 'auto_generate_codes': True, + }], + ), + + # ===== ros2_medkit Gateway (mDNS announce enabled) ===== + Node( + package='ros2_medkit_gateway', + executable='gateway_node', + name='ros2_medkit_gateway', + namespace='diagnostics', + output='screen', + parameters=[ + params_file, + {'discovery.manifest_path': manifest_file}, + plugin_overrides, + ], + ), + + # ===== Fault Manager (root namespace) ===== + Node( + package='ros2_medkit_fault_manager', + executable='fault_manager_node', + name='fault_manager', + namespace='', + output='screen', + parameters=[params_file], + ), + ]) diff --git a/demos/multi_ecu_aggregation/launch/perception.launch.py b/demos/multi_ecu_aggregation/launch/perception.launch.py new file mode 100644 index 0000000..7836509 --- /dev/null +++ b/demos/multi_ecu_aggregation/launch/perception.launch.py @@ -0,0 +1,130 @@ +# Copyright 2026 selfpatch +# Licensed under the Apache License, Version 2.0 + +"""Launch Perception ECU nodes with ros2_medkit gateway (aggregation ON). + +Perception ECU is the primary aggregator - it pulls diagnostics data from +planning and actuation ECUs via the gateway aggregation feature. + +Nodes launched: + /perception/lidar_driver - LiDAR sensor driver + /perception/camera_driver - Camera sensor driver + /perception/point_cloud_filter - Point cloud preprocessing + /perception/object_detector - Object detection pipeline + /diagnostics/ros2_medkit_gateway - SOVD gateway (aggregation enabled) + /fault_manager - Fault aggregation and storage + /bridge/diagnostic_bridge - Bridges /diagnostics topic to fault_manager +""" + +import os + +from ament_index_python.packages import get_package_prefix +from ament_index_python.packages import get_package_share_directory +from ament_index_python.packages import PackageNotFoundError +from launch import LaunchDescription +from launch_ros.actions import Node + + +def _resolve_plugin_path(package_name, lib_name): + """Resolve a gateway plugin .so path, returning empty string if not found.""" + try: + prefix = get_package_prefix(package_name) + path = os.path.join(prefix, 'lib', package_name, f'lib{lib_name}.so') + if os.path.isfile(path): + return path + except PackageNotFoundError: + pass + return '' + + +def generate_launch_description(): + pkg_dir = get_package_share_directory('multi_ecu_demo') + + # Config file paths + params_file = os.path.join(pkg_dir, 'config', 'perception_params.yaml') + manifest_file = os.path.join( + pkg_dir, 'config', 'perception_manifest.yaml') + + # Resolve optional plugin paths + graph_provider_path = _resolve_plugin_path( + 'ros2_medkit_graph_provider', 'ros2_medkit_graph_provider_plugin') + + plugin_overrides = {} + active_plugins = [] + if graph_provider_path: + active_plugins.append('graph_provider') + plugin_overrides['plugins.graph_provider.path'] = graph_provider_path + plugin_overrides['plugins'] = active_plugins + + return LaunchDescription([ + # ===== Perception Nodes (under /perception namespace) ===== + Node( + package='multi_ecu_demo', + executable='lidar_driver', + name='lidar_driver', + namespace='perception', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='camera_driver', + name='camera_driver', + namespace='perception', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='point_cloud_filter', + name='point_cloud_filter', + namespace='perception', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='object_detector', + name='object_detector', + namespace='perception', + output='screen', + parameters=[params_file], + ), + + # ===== Diagnostic Bridge (Legacy path) ===== + Node( + package='ros2_medkit_diagnostic_bridge', + executable='diagnostic_bridge_node', + name='diagnostic_bridge', + namespace='bridge', + output='screen', + parameters=[{ + 'diagnostics_topic': '/diagnostics', + 'auto_generate_codes': True, + }], + ), + + # ===== ros2_medkit Gateway (aggregation enabled) ===== + Node( + package='ros2_medkit_gateway', + executable='gateway_node', + name='ros2_medkit_gateway', + namespace='diagnostics', + output='screen', + parameters=[ + params_file, + {'discovery.manifest_path': manifest_file}, + plugin_overrides, + ], + ), + + # ===== Fault Manager (root namespace) ===== + Node( + package='ros2_medkit_fault_manager', + executable='fault_manager_node', + name='fault_manager', + namespace='', + output='screen', + parameters=[params_file], + ), + ]) diff --git a/demos/multi_ecu_aggregation/launch/planning.launch.py b/demos/multi_ecu_aggregation/launch/planning.launch.py new file mode 100644 index 0000000..5761695 --- /dev/null +++ b/demos/multi_ecu_aggregation/launch/planning.launch.py @@ -0,0 +1,132 @@ +# Copyright 2026 selfpatch +# Licensed under the Apache License, Version 2.0 + +"""Launch Planning ECU nodes with ros2_medkit gateway (no aggregation). + +Planning ECU runs as a standalone peer - it does not aggregate other ECUs. +A domain_bridge node bridges perception detections from domain 10 to domain 20. + +Nodes launched: + /planning/path_planner - Path planning from detection data + /planning/behavior_planner - Behavior decision-making + /planning/task_scheduler - Task queue management + /diagnostics/ros2_medkit_gateway - SOVD gateway (aggregation disabled) + /fault_manager - Fault aggregation and storage + /bridge/diagnostic_bridge - Bridges /diagnostics topic to fault_manager + planning_bridge - Domain bridge for cross-ECU topic relay +""" + +import os + +from ament_index_python.packages import get_package_prefix +from ament_index_python.packages import get_package_share_directory +from ament_index_python.packages import PackageNotFoundError +from launch import LaunchDescription +from launch_ros.actions import Node + + +def _resolve_plugin_path(package_name, lib_name): + """Resolve a gateway plugin .so path, returning empty string if not found.""" + try: + prefix = get_package_prefix(package_name) + path = os.path.join(prefix, 'lib', package_name, f'lib{lib_name}.so') + if os.path.isfile(path): + return path + except PackageNotFoundError: + pass + return '' + + +def generate_launch_description(): + pkg_dir = get_package_share_directory('multi_ecu_demo') + + # Config file paths + params_file = os.path.join(pkg_dir, 'config', 'planning_params.yaml') + manifest_file = os.path.join( + pkg_dir, 'config', 'planning_manifest.yaml') + bridge_config_path = os.path.join( + pkg_dir, 'config', 'domain_bridge', 'planning_bridge.yaml') + + # Resolve optional plugin paths + graph_provider_path = _resolve_plugin_path( + 'ros2_medkit_graph_provider', 'ros2_medkit_graph_provider_plugin') + + plugin_overrides = {} + active_plugins = [] + if graph_provider_path: + active_plugins.append('graph_provider') + plugin_overrides['plugins.graph_provider.path'] = graph_provider_path + plugin_overrides['plugins'] = active_plugins + + return LaunchDescription([ + # ===== Planning Nodes (under /planning namespace) ===== + Node( + package='multi_ecu_demo', + executable='path_planner', + name='path_planner', + namespace='planning', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='behavior_planner', + name='behavior_planner', + namespace='planning', + output='screen', + parameters=[params_file], + ), + Node( + package='multi_ecu_demo', + executable='task_scheduler', + name='task_scheduler', + namespace='planning', + output='screen', + parameters=[params_file], + ), + + # ===== Domain Bridge (cross-ECU topic relay) ===== + Node( + package='domain_bridge', + executable='domain_bridge', + name='planning_bridge', + arguments=[bridge_config_path], + ), + + # ===== Diagnostic Bridge (Legacy path) ===== + Node( + package='ros2_medkit_diagnostic_bridge', + executable='diagnostic_bridge_node', + name='diagnostic_bridge', + namespace='bridge', + output='screen', + parameters=[{ + 'diagnostics_topic': '/diagnostics', + 'auto_generate_codes': True, + }], + ), + + # ===== ros2_medkit Gateway (no aggregation) ===== + Node( + package='ros2_medkit_gateway', + executable='gateway_node', + name='ros2_medkit_gateway', + namespace='diagnostics', + output='screen', + parameters=[ + params_file, + {'discovery.manifest_path': manifest_file}, + plugin_overrides, + ], + ), + + # ===== Fault Manager (root namespace) ===== + Node( + package='ros2_medkit_fault_manager', + executable='fault_manager_node', + name='fault_manager', + namespace='', + output='screen', + parameters=[params_file], + ), + ]) diff --git a/demos/multi_ecu_aggregation/package.xml b/demos/multi_ecu_aggregation/package.xml new file mode 100644 index 0000000..3f0b869 --- /dev/null +++ b/demos/multi_ecu_aggregation/package.xml @@ -0,0 +1,34 @@ + + + + multi_ecu_demo + 0.1.0 + Multi-ECU aggregation demo for ros2_medkit - 3 ECUs with peer discovery + bburda + Apache-2.0 + + ament_cmake + + rclcpp + std_msgs + sensor_msgs + geometry_msgs + nav_msgs + vision_msgs + diagnostic_msgs + rcl_interfaces + ros2_medkit_msgs + + ros2launch + ros2_medkit_gateway + ros2_medkit_fault_manager + ros2_medkit_diagnostic_bridge + domain_bridge + + ament_lint_auto + ament_lint_common + + + ament_cmake + + diff --git a/demos/multi_ecu_aggregation/restore-normal.sh b/demos/multi_ecu_aggregation/restore-normal.sh new file mode 100755 index 0000000..672ceec --- /dev/null +++ b/demos/multi_ecu_aggregation/restore-normal.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Restore normal operation across all ECUs +# Resets all fault injection parameters and clears faults +set -eu +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck disable=SC1091 +source "${SCRIPT_DIR}/../../lib/scripts-api.sh" + +GATEWAY_URL="${GATEWAY_URL:-http://localhost:8080}" +if ! curl -sf "${GATEWAY_URL}/api/v1/health" > /dev/null 2>&1; then + echo "Error: Gateway not reachable at ${GATEWAY_URL}. Is the demo running?" + exit 1 +fi + +echo "Restoring normal operation across all ECUs..." + +# Perception ECU +execute_script "components" "perception-ecu" "restore-normal" "Restore normal (Perception ECU)" + +# Planning ECU +execute_script "components" "planning-ecu" "restore-normal" "Restore normal (Planning ECU)" + +# Actuation ECU +execute_script "components" "actuation-ecu" "restore-normal" "Restore normal (Actuation ECU)" + +echo "" +echo "All ECUs restored to normal operation" diff --git a/demos/multi_ecu_aggregation/run-demo.sh b/demos/multi_ecu_aggregation/run-demo.sh new file mode 100755 index 0000000..5101690 --- /dev/null +++ b/demos/multi_ecu_aggregation/run-demo.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# Multi-ECU Aggregation Demo Runner +# Starts Docker services with 3 ECUs (perception, planning, actuation) and web UI + +set -eu + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Parse arguments +DETACH_MODE="true" +UPDATE_IMAGES="false" +BUILD_ARGS="" + +usage() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --attached Run in foreground (default: daemon mode)" + echo " --update Pull latest images before running" + echo " --no-cache Build Docker images without cache" + echo " -h, --help Show this help message" + echo "" + echo "Examples:" + echo " $0 # Daemon mode (default)" + echo " $0 --attached # Foreground with logs" + echo " $0 --update # Pull and run latest version" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --attached) + echo "Running in foreground mode" + DETACH_MODE="false" + ;; + --update) + echo "Will pull latest images" + UPDATE_IMAGES="true" + ;; + --no-cache) + echo "Building without cache" + BUILD_ARGS="--no-cache" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac + shift +done + +echo "Multi-ECU Aggregation Demo with ros2_medkit" +echo "============================================" +echo " 3 ECUs: Perception | Planning | Actuation" +echo "" + +# Check for Docker +if ! command -v docker &> /dev/null; then + echo "Error: Docker is not installed" + exit 1 +fi + +echo "Run mode: $([ "$DETACH_MODE" = "true" ] && echo "daemon (background)" || echo "attached (foreground)")" +echo "" + +# Detect docker compose command +if docker compose version &> /dev/null; then + COMPOSE_CMD="docker compose" +else + COMPOSE_CMD="docker-compose" +fi + +# Pull images if --update flag is set +if [[ "$UPDATE_IMAGES" == "true" ]]; then + echo "Pulling latest images..." + # shellcheck disable=SC2086 + ${COMPOSE_CMD} pull + echo "" +fi + +# Build and start services +echo "Building and starting demo..." +echo " (First run may take several minutes)" +echo "" +echo "Gateway (Perception ECU): http://localhost:8080/api/v1/" +echo "Web UI: http://localhost:3000/" +echo "" + +DETACH_FLAG="" +if [[ "$DETACH_MODE" == "true" ]]; then + DETACH_FLAG="-d" +fi + +# shellcheck disable=SC2086 +if ! ${COMPOSE_CMD} build ${BUILD_ARGS}; then + echo "" + echo "Docker build failed! Stopping any partially created containers..." + # shellcheck disable=SC2086 + ${COMPOSE_CMD} down 2>/dev/null || true + exit 1 +fi + +# shellcheck disable=SC2086 +${COMPOSE_CMD} up ${DETACH_FLAG} + +if [[ "$DETACH_MODE" == "true" ]]; then + echo "" + echo "Demo started in background!" + echo "" + echo "To view logs:" + echo " docker compose logs -f" + echo " docker compose logs -f perception-ecu # Single ECU" + echo "" + echo "Inject faults:" + echo " ./inject-sensor-failure.sh # LiDAR sensor failure on Perception ECU" + echo " ./inject-planning-delay.sh # Path planning delay on Planning ECU" + echo " ./inject-gripper-jam.sh # Gripper jam on Actuation ECU" + echo " ./inject-cascade-failure.sh # Cascade failure across all ECUs" + echo " ./restore-normal.sh # Restore normal operation" + echo "" + echo "Web UI: http://localhost:3000" + echo "REST API: http://localhost:8080/api/v1/" + echo "" + echo "To stop: ./stop-demo.sh" +fi diff --git a/demos/multi_ecu_aggregation/src/actuation/gripper_controller.cpp b/demos/multi_ecu_aggregation/src/actuation/gripper_controller.cpp new file mode 100644 index 0000000..4f9d3d4 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/actuation/gripper_controller.cpp @@ -0,0 +1,267 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file gripper_controller.cpp +/// @brief Gripper controller node for Actuation ECU +/// +/// Subscribes to /planning/commands (Twist), maps linear.z to gripper +/// open/close motion. Tracks gripper position [0.0 = closed, 1.0 = open] +/// and publishes gripper_state (JointState). Supports jam fault injection. + +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "geometry_msgs/msg/twist.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/joint_state.hpp" + +namespace multi_ecu_demo +{ + +class GripperControllerNode : public rclcpp::Node +{ +public: + GripperControllerNode() + : Node("gripper_controller"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0), + last_update_time_(this->now()) + { + // Declare parameters + this->declare_parameter("inject_jam", false); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("gripper_rate", 10.0); // Hz + + load_parameters(); + + // Create publishers + gripper_pub_ = this->create_publisher("gripper_state", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Subscribe to planning commands (absolute topic - bridged from domain 20) + cmd_sub_ = this->create_subscription( + "/planning/commands", 10, + std::bind(&GripperControllerNode::on_command, this, std::placeholders::_1)); + + // Create timer (with rate validation) + double rate = gripper_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'gripper_rate' must be positive; using default 10.0 Hz instead of %.3f", + rate); + rate = 10.0; + gripper_rate_ = rate; + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&GripperControllerNode::publish_gripper_state, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&GripperControllerNode::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Gripper controller started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + inject_jam_ = this->get_parameter("inject_jam").as_bool(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + gripper_rate_ = this->get_parameter("gripper_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "inject_jam") { + inject_jam_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Jam injection %s", + inject_jam_ ? "enabled" : "disabled"); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "gripper_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "gripper_rate must be positive"; + return result; + } + gripper_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&GripperControllerNode::publish_gripper_state, this)); + RCLCPP_INFO(this->get_logger(), "Gripper rate changed to %.1f Hz", gripper_rate_); + } + } + + return result; + } + + void on_command(const geometry_msgs::msg::Twist::SharedPtr msg) + { + std::lock_guard lock(cmd_mutex_); + latest_z_command_ = msg->linear.z; + has_cmd_ = true; + } + + void publish_gripper_state() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("GRIPPER_FAILURE", "Gripper failure (injected)"); + return; + } + + auto now = this->now(); + double dt = (now - last_update_time_).seconds(); + last_update_time_ = now; + + // Clamp dt to avoid huge jumps on first tick or after pause + if (dt > 1.0) { + dt = 1.0; + } + + // Get latest command + double z_cmd = 0.0; + { + std::lock_guard lock(cmd_mutex_); + z_cmd = latest_z_command_; + } + + // Update gripper position (unless jammed) + if (!inject_jam_) { + // Positive z = open, negative z = close + // Scale command to reasonable gripper speed + gripper_position_ += z_cmd * dt; + gripper_position_ = std::clamp(gripper_position_, 0.0, 1.0); + } + + auto msg = sensor_msgs::msg::JointState(); + msg.header.stamp = now; + msg.name = {"gripper_finger"}; + msg.position = {gripper_position_}; + msg.velocity = {inject_jam_ ? 0.0 : z_cmd}; + msg.effort = {0.0}; + + gripper_pub_->publish(msg); + + // Diagnostics + if (inject_jam_) { + publish_diagnostics( + "JAMMED", + "Gripper jammed at position " + std::to_string(gripper_position_)); + } else if (!has_cmd_) { + publish_diagnostics("NO_COMMAND", "No command received yet"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "gripper_controller"; + diag.hardware_id = "actuation_gripper"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else if (status == "NO_COMMAND") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::WARN; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "gripper_position"; + kv.value = std::to_string(gripper_position_); + diag.values.push_back(kv); + + kv.key = "inject_jam"; + kv.value = inject_jam_ ? "true" : "false"; + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr gripper_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Subscription + rclcpp::Subscription::SharedPtr cmd_sub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Command data (protected by mutex for thread safety) + std::mutex cmd_mutex_; + double latest_z_command_{0.0}; + bool has_cmd_{false}; + + // Gripper state + double gripper_position_{0.0}; // 0.0 = closed, 1.0 = open + rclcpp::Time last_update_time_; + + // Parameters + bool inject_jam_; + double failure_probability_; + double gripper_rate_; + + // Statistics + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/actuation/joint_driver.cpp b/demos/multi_ecu_aggregation/src/actuation/joint_driver.cpp new file mode 100644 index 0000000..7283f64 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/actuation/joint_driver.cpp @@ -0,0 +1,287 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file joint_driver.cpp +/// @brief Joint driver node for Actuation ECU +/// +/// Subscribes to motor_status (JointState), passes through velocities, +/// accumulates position from velocity * dt, and publishes joint_state. +/// Supports overheat injection (ERROR diagnostic) and position drift. + +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/joint_state.hpp" + +namespace multi_ecu_demo +{ + +class JointDriverNode : public rclcpp::Node +{ +public: + JointDriverNode() + : Node("joint_driver"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0), + last_update_time_(this->now()) + { + // Declare parameters + this->declare_parameter("inject_overheat", false); + this->declare_parameter("drift_rate", 0.0); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("driver_rate", 50.0); // Hz + + load_parameters(); + + // Create publishers + joint_pub_ = this->create_publisher("joint_state", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Subscribe to motor_status (relative topic - within /actuation namespace) + motor_sub_ = this->create_subscription( + "motor_status", 10, + std::bind(&JointDriverNode::on_motor_status, this, std::placeholders::_1)); + + // Create timer (with rate validation) + double rate = driver_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'driver_rate' must be positive; using default 50.0 Hz instead of %.3f", + rate); + rate = 50.0; + driver_rate_ = rate; + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&JointDriverNode::publish_joint_state, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&JointDriverNode::on_parameter_change, this, std::placeholders::_1)); + + // Initialize position tracking for 2 joints (left_wheel, right_wheel) + accumulated_position_ = {0.0, 0.0}; + latest_velocity_ = {0.0, 0.0}; + latest_effort_ = {0.0, 0.0}; + + RCLCPP_INFO(this->get_logger(), "Joint driver started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + inject_overheat_ = this->get_parameter("inject_overheat").as_bool(); + drift_rate_ = this->get_parameter("drift_rate").as_double(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + driver_rate_ = this->get_parameter("driver_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "inject_overheat") { + inject_overheat_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Overheat injection %s", + inject_overheat_ ? "enabled" : "disabled"); + } else if (param.get_name() == "drift_rate") { + drift_rate_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Drift rate changed to %.4f", drift_rate_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "driver_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "driver_rate must be positive"; + return result; + } + driver_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&JointDriverNode::publish_joint_state, this)); + RCLCPP_INFO(this->get_logger(), "Driver rate changed to %.1f Hz", driver_rate_); + } + } + + return result; + } + + void on_motor_status(const sensor_msgs::msg::JointState::SharedPtr msg) + { + std::lock_guard lock(motor_mutex_); + if (msg->velocity.size() >= 2) { + latest_velocity_ = {msg->velocity[0], msg->velocity[1]}; + } + if (msg->effort.size() >= 2) { + latest_effort_ = {msg->effort[0], msg->effort[1]}; + } + has_motor_data_ = true; + } + + void publish_joint_state() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("DRIVER_FAILURE", "Joint driver failure (injected)"); + return; + } + + auto now = this->now(); + double dt = (now - last_update_time_).seconds(); + last_update_time_ = now; + + // Clamp dt to avoid huge jumps on first tick or after pause + if (dt > 1.0) { + dt = 1.0; + } + + auto msg = sensor_msgs::msg::JointState(); + msg.header.stamp = now; + msg.name = {"left_wheel", "right_wheel"}; + + // Get latest motor data + std::vector velocity; + std::vector effort; + { + std::lock_guard lock(motor_mutex_); + velocity = latest_velocity_; + effort = latest_effort_; + } + + // Pass through velocities + msg.velocity = velocity; + msg.effort = effort; + + // Accumulate position from velocity * dt + drift + accumulated_drift_ += drift_rate_ * dt; + for (size_t i = 0; i < accumulated_position_.size(); ++i) { + accumulated_position_[i] += velocity[i] * dt + drift_rate_ * dt; + } + msg.position = accumulated_position_; + + joint_pub_->publish(msg); + + // Diagnostics + if (inject_overheat_) { + publish_diagnostics("OVERHEAT", "Joint driver overheat warning - temperature critical"); + } else if (std::abs(accumulated_drift_) > 0.1) { + publish_diagnostics( + "DRIFTING", + "Position drift: " + std::to_string(accumulated_drift_) + " rad"); + } else if (!has_motor_data_) { + publish_diagnostics("NO_MOTOR_DATA", "No motor status received yet"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "joint_driver"; + diag.hardware_id = "actuation_joints"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else if (status == "NO_MOTOR_DATA") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::WARN; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "accumulated_drift"; + kv.value = std::to_string(accumulated_drift_); + diag.values.push_back(kv); + + kv.key = "inject_overheat"; + kv.value = inject_overheat_ ? "true" : "false"; + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr joint_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Subscription + rclcpp::Subscription::SharedPtr motor_sub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Motor data (protected by mutex for thread safety) + std::mutex motor_mutex_; + std::vector latest_velocity_; + std::vector latest_effort_; + bool has_motor_data_{false}; + + // Position tracking + std::vector accumulated_position_; + double accumulated_drift_{0.0}; + rclcpp::Time last_update_time_; + + // Parameters + bool inject_overheat_; + double drift_rate_; + double failure_probability_; + double driver_rate_; + + // Statistics + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/actuation/motor_controller.cpp b/demos/multi_ecu_aggregation/src/actuation/motor_controller.cpp new file mode 100644 index 0000000..f3b82dd --- /dev/null +++ b/demos/multi_ecu_aggregation/src/actuation/motor_controller.cpp @@ -0,0 +1,252 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file motor_controller.cpp +/// @brief Motor controller node for Actuation ECU +/// +/// Subscribes to /planning/commands (Twist), converts to differential drive +/// wheel velocities, and publishes motor_status (JointState) with configurable +/// torque noise and failure injection. + +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "geometry_msgs/msg/twist.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/joint_state.hpp" + +namespace multi_ecu_demo +{ + +class MotorControllerNode : public rclcpp::Node +{ +public: + MotorControllerNode() + : Node("motor_controller"), + rng_(std::random_device{}()), + normal_dist_(0.0, 1.0), + uniform_dist_(0.0, 1.0) + { + // Declare parameters + this->declare_parameter("torque_noise", 0.01); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("status_rate", 20.0); // Hz + + load_parameters(); + + // Create publishers + motor_pub_ = this->create_publisher("motor_status", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Subscribe to planning commands (absolute topic - bridged from domain 20) + cmd_sub_ = this->create_subscription( + "/planning/commands", 10, + std::bind(&MotorControllerNode::on_command, this, std::placeholders::_1)); + + // Create timer (with rate validation) + double rate = status_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'status_rate' must be positive; using default 20.0 Hz instead of %.3f", + rate); + rate = 20.0; + status_rate_ = rate; + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&MotorControllerNode::publish_motor_status, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&MotorControllerNode::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Motor controller started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + torque_noise_ = this->get_parameter("torque_noise").as_double(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + status_rate_ = this->get_parameter("status_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "torque_noise") { + torque_noise_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Torque noise changed to %.4f", torque_noise_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "status_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "status_rate must be positive"; + return result; + } + status_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&MotorControllerNode::publish_motor_status, this)); + RCLCPP_INFO(this->get_logger(), "Status rate changed to %.1f Hz", status_rate_); + } + } + + return result; + } + + void on_command(const geometry_msgs::msg::Twist::SharedPtr msg) + { + std::lock_guard lock(cmd_mutex_); + latest_cmd_ = *msg; + has_cmd_ = true; + } + + void publish_motor_status() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("MOTOR_FAILURE", "Motor failure (injected)"); + return; + } + + auto msg = sensor_msgs::msg::JointState(); + msg.header.stamp = this->now(); + msg.name = {"left_wheel", "right_wheel"}; + + // Get latest command + double linear_x = 0.0; + double angular_z = 0.0; + { + std::lock_guard lock(cmd_mutex_); + if (has_cmd_) { + linear_x = latest_cmd_.linear.x; + angular_z = latest_cmd_.angular.z; + } + } + + // Differential drive conversion + double left_vel = linear_x - angular_z; + double right_vel = linear_x + angular_z; + + msg.velocity = {left_vel, right_vel}; + + // Effort with Gaussian noise + double left_effort = left_vel + normal_dist_(rng_) * torque_noise_; + double right_effort = right_vel + normal_dist_(rng_) * torque_noise_; + msg.effort = {left_effort, right_effort}; + + // Position placeholder (not tracked here - joint_driver accumulates) + msg.position = {0.0, 0.0}; + + motor_pub_->publish(msg); + + // Diagnostics + if (!has_cmd_) { + publish_diagnostics("NO_COMMAND", "No command received yet"); + } else if (torque_noise_ > 0.1) { + publish_diagnostics("HIGH_NOISE", "High torque noise: " + std::to_string(torque_noise_)); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "motor_controller"; + diag.hardware_id = "actuation_motors"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else if (status == "NO_COMMAND") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::WARN; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "torque_noise"; + kv.value = std::to_string(torque_noise_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr motor_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Subscription + rclcpp::Subscription::SharedPtr cmd_sub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::normal_distribution normal_dist_; + std::uniform_real_distribution uniform_dist_; + + // Latest command (protected by mutex for thread safety) + std::mutex cmd_mutex_; + geometry_msgs::msg::Twist latest_cmd_; + bool has_cmd_{false}; + + // Parameters + double torque_noise_; + double failure_probability_; + double status_rate_; + + // Statistics + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/perception/camera_driver.cpp b/demos/multi_ecu_aggregation/src/perception/camera_driver.cpp new file mode 100644 index 0000000..08c284d --- /dev/null +++ b/demos/multi_ecu_aggregation/src/perception/camera_driver.cpp @@ -0,0 +1,276 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file camera_driver.cpp +/// @brief Simulated RGB camera with configurable fault injection +/// +/// Publishes simulated Image messages with a gradient pattern, noise injection, +/// and black frame injection. Diagnostics are published to /diagnostics for the +/// legacy fault reporting path via ros2_medkit_diagnostic_bridge. + +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/image.hpp" + +namespace multi_ecu_demo +{ + +class CameraDriver : public rclcpp::Node +{ +public: + CameraDriver() + : Node("camera_driver"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters with defaults + this->declare_parameter("rate", 30.0); // Hz + this->declare_parameter("width", 640); // pixels + this->declare_parameter("height", 480); // pixels + this->declare_parameter("noise_level", 0.0); // 0.0 - 1.0 fraction of noisy pixels + this->declare_parameter("failure_probability", 0.0); // 0.0 - 1.0 + this->declare_parameter("inject_black_frames", false); + this->declare_parameter("brightness", 128); // 0-255 base brightness + + load_parameters(); + + // Create publishers + image_pub_ = this->create_publisher("image_raw", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Create timer (with rate validation) + double rate = this->get_parameter("rate").as_double(); + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'rate' must be > 0.0 Hz, but got %.3f. Falling back to 30.0 Hz.", rate); + rate = 30.0; + this->set_parameters({rclcpp::Parameter("rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&CameraDriver::publish_image, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&CameraDriver::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO( + this->get_logger(), "Camera driver started at %.1f Hz (%dx%d)", + rate, width_, height_); + } + +private: + void load_parameters() + { + width_ = this->get_parameter("width").as_int(); + height_ = this->get_parameter("height").as_int(); + noise_level_ = this->get_parameter("noise_level").as_double(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + inject_black_frames_ = this->get_parameter("inject_black_frames").as_bool(); + brightness_ = this->get_parameter("brightness").as_int(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "noise_level") { + noise_level_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Noise level changed to %.2f", noise_level_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "inject_black_frames") { + inject_black_frames_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Black frames %s", + inject_black_frames_ ? "enabled" : "disabled"); + } else if (param.get_name() == "brightness") { + brightness_ = param.as_int(); + RCLCPP_INFO(this->get_logger(), "Brightness changed to %d", brightness_); + } else if (param.get_name() == "rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid rate parameter value (%f Hz). Rejecting change.", + rate); + result.successful = false; + result.reason = "rate must be positive"; + return result; + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&CameraDriver::publish_image, this)); + RCLCPP_INFO(this->get_logger(), "Rate changed to %.1f Hz", rate); + } + } + + return result; + } + + void publish_image() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("TIMEOUT", "Camera failure (injected)"); + return; + } + + auto image = sensor_msgs::msg::Image(); + image.header.stamp = this->now(); + image.header.frame_id = "camera_link"; + + image.width = static_cast(width_); + image.height = static_cast(height_); + image.encoding = "rgb8"; + image.is_bigendian = false; + image.step = static_cast(width_ * 3); // 3 bytes per pixel (RGB) + + // Generate image data + size_t data_size = static_cast(width_ * height_ * 3); + image.data.resize(data_size); + + bool is_black_frame = inject_black_frames_ && uniform_dist_(rng_) < 0.1; + + if (is_black_frame) { + // All black frame + std::fill(image.data.begin(), image.data.end(), 0); + publish_diagnostics("BLACK_FRAME", "Black frame detected"); + } else { + // Generate gradient pattern with noise + for (int y = 0; y < height_; y++) { + for (int x = 0; x < width_; x++) { + size_t idx = static_cast((y * width_ + x) * 3); + + // Base gradient pattern + uint8_t r = static_cast(std::clamp(brightness_ + x / 5, 0, 255)); + uint8_t g = static_cast(std::clamp(brightness_ + y / 4, 0, 255)); + uint8_t b = static_cast(std::clamp(brightness_, 0, 255)); + + // Add noise - replace pixel with random values + if (uniform_dist_(rng_) < noise_level_) { + r = static_cast(uniform_dist_(rng_) * 255); + g = static_cast(uniform_dist_(rng_) * 255); + b = static_cast(uniform_dist_(rng_) * 255); + } + + image.data[idx] = r; + image.data[idx + 1] = g; + image.data[idx + 2] = b; + } + } + + if (noise_level_ > 0.1) { + publish_diagnostics("HIGH_NOISE", "High noise level: " + std::to_string(noise_level_)); + } else if (brightness_ < 30) { + publish_diagnostics("LOW_BRIGHTNESS", "Image too dark"); + } else if (brightness_ > 225) { + publish_diagnostics("OVEREXPOSED", "Image overexposed"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + image_pub_->publish(image); + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "camera_driver"; + diag.hardware_id = "perception_camera"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "resolution"; + kv.value = std::to_string(width_) + "x" + std::to_string(height_); + diag.values.push_back(kv); + + kv.key = "noise_level"; + kv.value = std::to_string(noise_level_); + diag.values.push_back(kv); + + kv.key = "brightness"; + kv.value = std::to_string(brightness_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr image_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + int width_; + int height_; + double noise_level_; + double failure_probability_; + bool inject_black_frames_; + int brightness_; + + // Statistics + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/perception/lidar_driver.cpp b/demos/multi_ecu_aggregation/src/perception/lidar_driver.cpp new file mode 100644 index 0000000..ae5e64b --- /dev/null +++ b/demos/multi_ecu_aggregation/src/perception/lidar_driver.cpp @@ -0,0 +1,305 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file lidar_driver.cpp +/// @brief Simulated 2D LiDAR scanner with configurable fault injection +/// +/// Publishes simulated LaserScan messages with a sinusoidal base pattern, +/// Gaussian noise, drift, and NaN injection. Diagnostics are published to +/// /diagnostics for the legacy fault reporting path via ros2_medkit_diagnostic_bridge. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/laser_scan.hpp" + +namespace multi_ecu_demo +{ + +class LidarDriver : public rclcpp::Node +{ +public: + LidarDriver() + : Node("lidar_driver"), + rng_(std::random_device{}()), + normal_dist_(0.0, 1.0), + uniform_dist_(0.0, 1.0), + start_time_(this->now()) + { + // Declare parameters with defaults + this->declare_parameter("scan_rate", 10.0); // Hz + this->declare_parameter("range_min", 0.12); // meters + this->declare_parameter("range_max", 3.5); // meters + this->declare_parameter("angle_min", -3.14159265); // radians (-PI) + this->declare_parameter("angle_max", 3.14159265); // radians (PI) + this->declare_parameter("num_readings", 360); // number of laser beams + this->declare_parameter("noise_stddev", 0.01); // meters + this->declare_parameter("failure_probability", 0.0); // 0.0 - 1.0 + this->declare_parameter("inject_nan", false); + this->declare_parameter("drift_rate", 0.0); // meters per second + + load_parameters(); + + // Create publishers + scan_pub_ = this->create_publisher("scan", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Calculate publish period from rate (with validation) + double rate = this->get_parameter("scan_rate").as_double(); + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid scan_rate parameter value (%f Hz). Using default 10.0 Hz instead.", + rate); + rate = 10.0; + this->set_parameters({rclcpp::Parameter("scan_rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + + // Create timer + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&LidarDriver::publish_scan, this)); + + // Register parameter callback for runtime reconfiguration + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&LidarDriver::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "LiDAR driver started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + range_min_ = this->get_parameter("range_min").as_double(); + range_max_ = this->get_parameter("range_max").as_double(); + angle_min_ = this->get_parameter("angle_min").as_double(); + angle_max_ = this->get_parameter("angle_max").as_double(); + num_readings_ = this->get_parameter("num_readings").as_int(); + if (num_readings_ <= 0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid num_readings parameter (%d). Using default 360.", + num_readings_); + num_readings_ = 360; + this->set_parameters({rclcpp::Parameter("num_readings", num_readings_)}); + } + noise_stddev_ = this->get_parameter("noise_stddev").as_double(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + inject_nan_ = this->get_parameter("inject_nan").as_bool(); + drift_rate_ = this->get_parameter("drift_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "noise_stddev") { + noise_stddev_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Noise stddev changed to %.4f", noise_stddev_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "inject_nan") { + inject_nan_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Inject NaN %s", + inject_nan_ ? "enabled" : "disabled"); + } else if (param.get_name() == "drift_rate") { + drift_rate_ = param.as_double(); + start_time_ = this->now(); + RCLCPP_INFO(this->get_logger(), "Drift rate changed to %.4f (timer reset)", drift_rate_); + } else if (param.get_name() == "scan_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid scan_rate parameter value (%f Hz). Rejecting change.", + rate); + result.successful = false; + result.reason = "scan_rate must be positive"; + return result; + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&LidarDriver::publish_scan, this)); + RCLCPP_INFO(this->get_logger(), "Scan rate changed to %.1f Hz", rate); + } + } + + return result; + } + + void publish_scan() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("TIMEOUT", "Sensor failure (injected)"); + return; // Don't publish - simulates timeout + } + + auto msg = sensor_msgs::msg::LaserScan(); + msg.header.stamp = this->now(); + msg.header.frame_id = "lidar_link"; + + msg.angle_min = static_cast(angle_min_); + msg.angle_max = static_cast(angle_max_); + msg.angle_increment = + static_cast((angle_max_ - angle_min_) / static_cast(num_readings_)); + msg.time_increment = 0.0f; + msg.scan_time = static_cast(1.0 / this->get_parameter("scan_rate").as_double()); + msg.range_min = static_cast(range_min_); + msg.range_max = static_cast(range_max_); + + // Calculate drift offset + double elapsed = (this->now() - start_time_).seconds(); + double drift_offset = drift_rate_ * elapsed; + + // Generate simulated ranges with sinusoidal pattern + noise + drift + msg.ranges.resize(static_cast(num_readings_)); + msg.intensities.resize(static_cast(num_readings_)); + + int nan_count = 0; + for (int i = 0; i < num_readings_; i++) { + double angle = angle_min_ + i * msg.angle_increment; + + // Sinusoidal base pattern simulating walls at varying distances + double base_range = 2.0 + 0.5 * std::sin(2.0 * angle) + 0.3 * std::sin(5.0 * angle); + + // Apply drift + base_range += drift_offset; + + // Add Gaussian noise + double noise = normal_dist_(rng_) * noise_stddev_; + double range = base_range + noise; + + // Clamp to valid range + range = std::clamp(range, range_min_, range_max_); + + // Inject NaN if configured + if (inject_nan_) { + range = std::numeric_limits::quiet_NaN(); + nan_count++; + } + + msg.ranges[static_cast(i)] = static_cast(range); + msg.intensities[static_cast(i)] = + static_cast(100.0 * (1.0 - range / range_max_)); + } + + scan_pub_->publish(msg); + + // Publish diagnostics + if (nan_count > 0) { + publish_diagnostics("NAN_VALUES", "NaN values detected: " + std::to_string(nan_count)); + } else if (noise_stddev_ > 0.1) { + publish_diagnostics("HIGH_NOISE", "Noise stddev: " + std::to_string(noise_stddev_)); + } else if (drift_offset > 0.1) { + publish_diagnostics("DRIFTING", "Drift: " + std::to_string(drift_offset) + "m"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "lidar_driver"; + diag.hardware_id = "perception_lidar"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "noise_stddev"; + kv.value = std::to_string(noise_stddev_); + diag.values.push_back(kv); + + kv.key = "failure_probability"; + kv.value = std::to_string(failure_probability_); + diag.values.push_back(kv); + + kv.key = "drift_rate"; + kv.value = std::to_string(drift_rate_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr scan_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback handle + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::normal_distribution normal_dist_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + double range_min_; + double range_max_; + double angle_min_; + double angle_max_; + int num_readings_; + double noise_stddev_; + double failure_probability_; + bool inject_nan_; + double drift_rate_; + + // Statistics + rclcpp::Time start_time_; + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/perception/object_detector.cpp b/demos/multi_ecu_aggregation/src/perception/object_detector.cpp new file mode 100644 index 0000000..d8dc217 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/perception/object_detector.cpp @@ -0,0 +1,321 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file object_detector.cpp +/// @brief Timer-based object detector that subscribes to filtered point cloud +/// +/// Generates fake 2D detections at a configurable rate. Subscribes to +/// filtered_points (PointCloud2) to track input availability. Supports +/// false positive injection, detection miss rate, and complete failure. +/// Diagnostics are published to /diagnostics. + +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/point_cloud2.hpp" +#include "vision_msgs/msg/detection2_d.hpp" +#include "vision_msgs/msg/detection2_d_array.hpp" +#include "vision_msgs/msg/object_hypothesis_with_pose.hpp" + +namespace multi_ecu_demo +{ + +class ObjectDetector : public rclcpp::Node +{ +public: + ObjectDetector() + : Node("object_detector"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters with defaults + this->declare_parameter("false_positive_rate", 0.0); // 0.0 - 1.0 + this->declare_parameter("miss_rate", 0.0); // 0.0 - 1.0 + this->declare_parameter("failure_probability", 0.0); // 0.0 - 1.0 + this->declare_parameter("detection_rate", 5.0); // Hz + + load_parameters(); + + // Subscribe to filtered point cloud (tracks input availability) + cloud_sub_ = this->create_subscription( + "filtered_points", 10, + std::bind(&ObjectDetector::cloud_callback, this, std::placeholders::_1)); + + // Create publishers + detection_pub_ = this->create_publisher( + "detections", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Create timer for detection rate (with validation) + double rate = detection_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid detection_rate parameter value (%f Hz). Using default 5.0 Hz.", + rate); + rate = 5.0; + detection_rate_ = rate; + this->set_parameters({rclcpp::Parameter("detection_rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&ObjectDetector::detect, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&ObjectDetector::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Object detector started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + false_positive_rate_ = this->get_parameter("false_positive_rate").as_double(); + miss_rate_ = this->get_parameter("miss_rate").as_double(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + detection_rate_ = this->get_parameter("detection_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "false_positive_rate") { + false_positive_rate_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "False positive rate changed to %.2f", + false_positive_rate_); + } else if (param.get_name() == "miss_rate") { + miss_rate_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Miss rate changed to %.2f", miss_rate_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } else if (param.get_name() == "detection_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Invalid detection_rate parameter value (%f Hz). Rejecting change.", + rate); + result.successful = false; + result.reason = "detection_rate must be positive"; + return result; + } + detection_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&ObjectDetector::detect, this)); + RCLCPP_INFO(this->get_logger(), "Detection rate changed to %.1f Hz", rate); + } + } + + return result; + } + + void cloud_callback(const sensor_msgs::msg::PointCloud2::SharedPtr msg) + { + last_cloud_time_ = this->now(); + last_cloud_points_ = msg->width * msg->height; + } + + void detect() + { + msg_count_++; + + // Check for complete failure + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("FAILURE", "Detector failure (injected)"); + return; + } + + // Check if we have recent point cloud input + bool has_input = false; + if (last_cloud_time_.nanoseconds() > 0) { + double elapsed = (this->now() - last_cloud_time_).seconds(); + has_input = elapsed < 2.0; // Consider stale after 2 seconds + } + + // Check for miss rate - suppress all detections + if (uniform_dist_(rng_) < miss_rate_) { + auto det_array = vision_msgs::msg::Detection2DArray(); + det_array.header.stamp = this->now(); + det_array.header.frame_id = "camera_link"; + detection_pub_->publish(det_array); + publish_diagnostics("MISSED", "All detections suppressed (miss rate)"); + return; + } + + auto det_array = vision_msgs::msg::Detection2DArray(); + det_array.header.stamp = this->now(); + det_array.header.frame_id = "camera_link"; + + // Generate 1-3 fake detections + std::uniform_int_distribution count_dist(1, 3); + int num_detections = count_dist(rng_); + + // Object class labels for fake detections + static const std::vector class_labels = { + "person", "car", "bicycle", "obstacle", "cone" + }; + std::uniform_int_distribution class_dist(0, class_labels.size() - 1); + + for (int i = 0; i < num_detections; i++) { + auto det = vision_msgs::msg::Detection2D(); + + // Random bounding box center and size in image coordinates + det.bbox.center.position.x = uniform_dist_(rng_) * 640.0; + det.bbox.center.position.y = uniform_dist_(rng_) * 480.0; + det.bbox.size_x = 50.0 + uniform_dist_(rng_) * 100.0; + det.bbox.size_y = 50.0 + uniform_dist_(rng_) * 150.0; + + // Add hypothesis with confidence + auto hypothesis = vision_msgs::msg::ObjectHypothesisWithPose(); + hypothesis.hypothesis.class_id = class_labels[class_dist(rng_)]; + hypothesis.hypothesis.score = 0.6 + uniform_dist_(rng_) * 0.4; // 0.6 - 1.0 + det.results.push_back(hypothesis); + + det_array.detections.push_back(det); + } + + // Inject false positive with probability + if (uniform_dist_(rng_) < false_positive_rate_) { + auto fp_det = vision_msgs::msg::Detection2D(); + fp_det.bbox.center.position.x = uniform_dist_(rng_) * 640.0; + fp_det.bbox.center.position.y = uniform_dist_(rng_) * 480.0; + fp_det.bbox.size_x = 30.0 + uniform_dist_(rng_) * 60.0; + fp_det.bbox.size_y = 30.0 + uniform_dist_(rng_) * 60.0; + + auto fp_hypothesis = vision_msgs::msg::ObjectHypothesisWithPose(); + fp_hypothesis.hypothesis.class_id = "phantom"; + fp_hypothesis.hypothesis.score = 0.3 + uniform_dist_(rng_) * 0.3; // low confidence + fp_det.results.push_back(fp_hypothesis); + + det_array.detections.push_back(fp_det); + false_positive_count_++; + } + + detection_pub_->publish(det_array); + + // Publish diagnostics + if (!has_input) { + publish_diagnostics( + "NO_INPUT", + "No recent point cloud input - detections may be stale"); + } else if (false_positive_rate_ > 0.3) { + publish_diagnostics( + "HIGH_FP_RATE", + "High false positive rate: " + std::to_string(false_positive_rate_)); + } else { + publish_diagnostics( + "OK", + "Published " + std::to_string(det_array.detections.size()) + " detections"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "object_detector"; + diag.hardware_id = "perception_detector"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "false_positive_count"; + kv.value = std::to_string(false_positive_count_); + diag.values.push_back(kv); + + kv.key = "last_cloud_points"; + kv.value = std::to_string(last_cloud_points_); + diag.values.push_back(kv); + + kv.key = "false_positive_rate"; + kv.value = std::to_string(false_positive_rate_); + diag.values.push_back(kv); + + kv.key = "miss_rate"; + kv.value = std::to_string(miss_rate_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Subscription + rclcpp::Subscription::SharedPtr cloud_sub_; + + // Publishers + rclcpp::Publisher::SharedPtr detection_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + double false_positive_rate_; + double miss_rate_; + double failure_probability_; + double detection_rate_; + + // State tracking + rclcpp::Time last_cloud_time_{0, 0, RCL_SYSTEM_TIME}; + uint32_t last_cloud_points_{0}; + + // Statistics + uint64_t msg_count_{0}; + uint64_t false_positive_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/perception/point_cloud_filter.cpp b/demos/multi_ecu_aggregation/src/perception/point_cloud_filter.cpp new file mode 100644 index 0000000..1ffcaa5 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/perception/point_cloud_filter.cpp @@ -0,0 +1,314 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file point_cloud_filter.cpp +/// @brief Subscribes to LaserScan, converts to PointCloud2, and publishes filtered points +/// +/// Converts incoming LaserScan ranges to XYZ points in a PointCloud2 message. +/// Supports point drop rate, artificial processing delay, and complete failure +/// injection. Diagnostics are published to /diagnostics. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "sensor_msgs/msg/laser_scan.hpp" +#include "sensor_msgs/msg/point_cloud2.hpp" +#include "sensor_msgs/msg/point_field.hpp" + +namespace multi_ecu_demo +{ + +class PointCloudFilter : public rclcpp::Node +{ +public: + PointCloudFilter() + : Node("point_cloud_filter"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters with defaults + this->declare_parameter("drop_rate", 0.0); // 0.0 - 1.0 probability of dropping each point + this->declare_parameter("delay_ms", 0); // artificial processing delay in milliseconds + this->declare_parameter("failure_probability", 0.0); // 0.0 - 1.0 + + load_parameters(); + + // Create subscription to LaserScan + scan_sub_ = this->create_subscription( + "scan", 10, + std::bind(&PointCloudFilter::scan_callback, this, std::placeholders::_1)); + + // Create publisher for filtered PointCloud2 + cloud_pub_ = this->create_publisher("filtered_points", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&PointCloudFilter::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Point cloud filter started"); + } + +private: + void load_parameters() + { + drop_rate_ = this->get_parameter("drop_rate").as_double(); + delay_ms_ = this->get_parameter("delay_ms").as_int(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "drop_rate") { + drop_rate_ = param.as_double(); + RCLCPP_INFO(this->get_logger(), "Drop rate changed to %.2f", drop_rate_); + } else if (param.get_name() == "delay_ms") { + delay_ms_ = param.as_int(); + RCLCPP_INFO(this->get_logger(), "Delay changed to %d ms", delay_ms_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", + failure_probability_); + } + } + + return result; + } + + void scan_callback(const sensor_msgs::msg::LaserScan::SharedPtr scan) + { + msg_count_++; + + // Intentional blocking sleep to simulate slow processing pipeline. + // This blocks the single-threaded executor during the delay. + if (delay_ms_ > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms_)); + } + + // Check for complete failure - publish empty cloud + if (uniform_dist_(rng_) < failure_probability_) { + publish_empty_cloud(scan->header); + publish_diagnostics("FAILURE", "Filter failure (injected) - empty cloud published"); + return; + } + + // Convert LaserScan to PointCloud2 with filtering + std::vector points; // x, y, z triples + int input_count = static_cast(scan->ranges.size()); + int dropped_count = 0; + + for (size_t i = 0; i < scan->ranges.size(); i++) { + float range = scan->ranges[i]; + + // Skip invalid ranges + if (std::isnan(range) || std::isinf(range) || + range < scan->range_min || range > scan->range_max) + { + dropped_count++; + continue; + } + + // Apply drop rate - probability of dropping each point + if (uniform_dist_(rng_) < drop_rate_) { + dropped_count++; + continue; + } + + // Convert polar to Cartesian (x, y, z=0 for 2D scan) + float angle = scan->angle_min + static_cast(i) * scan->angle_increment; + float x = range * std::cos(angle); + float y = range * std::sin(angle); + float z = 0.0f; + + points.push_back(x); + points.push_back(y); + points.push_back(z); + } + + // Build PointCloud2 message + auto cloud = sensor_msgs::msg::PointCloud2(); + cloud.header = scan->header; + cloud.header.frame_id = "lidar_link"; + + uint32_t num_points = static_cast(points.size() / 3); + cloud.height = 1; + cloud.width = num_points; + cloud.is_dense = true; + cloud.is_bigendian = false; + + // Define point fields: x, y, z (float32 each) + sensor_msgs::msg::PointField field_x; + field_x.name = "x"; + field_x.offset = 0; + field_x.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_x.count = 1; + + sensor_msgs::msg::PointField field_y; + field_y.name = "y"; + field_y.offset = 4; + field_y.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_y.count = 1; + + sensor_msgs::msg::PointField field_z; + field_z.name = "z"; + field_z.offset = 8; + field_z.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_z.count = 1; + + cloud.fields = {field_x, field_y, field_z}; + cloud.point_step = 12; // 3 floats * 4 bytes + cloud.row_step = cloud.point_step * num_points; + + // Copy point data + cloud.data.resize(points.size() * sizeof(float)); + std::memcpy(cloud.data.data(), points.data(), cloud.data.size()); + + cloud_pub_->publish(cloud); + + // Publish diagnostics + int output_count = static_cast(num_points); + if (drop_rate_ > 0.5) { + publish_diagnostics( + "HIGH_DROP_RATE", + "Dropping " + std::to_string(dropped_count) + "/" + std::to_string(input_count) + + " points"); + } else if (delay_ms_ > 100) { + publish_diagnostics( + "HIGH_LATENCY", + "Processing delay: " + std::to_string(delay_ms_) + "ms"); + } else { + publish_diagnostics( + "OK", + "Filtered " + std::to_string(input_count) + " -> " + std::to_string(output_count) + + " points"); + } + } + + void publish_empty_cloud(const std_msgs::msg::Header & header) + { + auto cloud = sensor_msgs::msg::PointCloud2(); + cloud.header = header; + cloud.header.frame_id = "lidar_link"; + cloud.height = 1; + cloud.width = 0; + cloud.is_dense = true; + cloud.is_bigendian = false; + + sensor_msgs::msg::PointField field_x; + field_x.name = "x"; + field_x.offset = 0; + field_x.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_x.count = 1; + + sensor_msgs::msg::PointField field_y; + field_y.name = "y"; + field_y.offset = 4; + field_y.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_y.count = 1; + + sensor_msgs::msg::PointField field_z; + field_z.name = "z"; + field_z.offset = 8; + field_z.datatype = sensor_msgs::msg::PointField::FLOAT32; + field_z.count = 1; + + cloud.fields = {field_x, field_y, field_z}; + cloud.point_step = 12; + cloud.row_step = 0; + + cloud_pub_->publish(cloud); + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "point_cloud_filter"; + diag.hardware_id = "perception_filter"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "msg_count"; + kv.value = std::to_string(msg_count_); + diag.values.push_back(kv); + + kv.key = "drop_rate"; + kv.value = std::to_string(drop_rate_); + diag.values.push_back(kv); + + kv.key = "delay_ms"; + kv.value = std::to_string(delay_ms_); + diag.values.push_back(kv); + + kv.key = "failure_probability"; + kv.value = std::to_string(failure_probability_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Subscription + rclcpp::Subscription::SharedPtr scan_sub_; + + // Publishers + rclcpp::Publisher::SharedPtr cloud_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + double drop_rate_; + int delay_ms_; + double failure_probability_; + + // Statistics + uint64_t msg_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/planning/behavior_planner.cpp b/demos/multi_ecu_aggregation/src/planning/behavior_planner.cpp new file mode 100644 index 0000000..7ec1508 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/planning/behavior_planner.cpp @@ -0,0 +1,262 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file behavior_planner.cpp +/// @brief Behavior planner node for the Planning ECU +/// +/// Subscribes to a path (nav_msgs/msg/Path) and publishes velocity commands +/// (geometry_msgs/msg/Twist) using simple proportional control toward the next +/// waypoint. Supports direction inversion and failure probability for fault +/// injection. + +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "geometry_msgs/msg/twist.hpp" +#include "nav_msgs/msg/path.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" + +namespace multi_ecu_demo +{ + +class BehaviorPlanner : public rclcpp::Node +{ +public: + BehaviorPlanner() + : Node("behavior_planner"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters + this->declare_parameter("inject_wrong_direction", false); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("command_rate", 10.0); // Hz + + load_parameters(); + + // Create publishers + cmd_pub_ = this->create_publisher("commands", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Create subscription to path + path_sub_ = this->create_subscription( + "path", 10, + std::bind(&BehaviorPlanner::on_path, this, std::placeholders::_1)); + + // Create timer (with rate validation) + double rate = command_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'command_rate' must be > 0.0 Hz, but got %.3f. Falling back to 10.0 Hz.", + rate); + rate = 10.0; + this->set_parameters({rclcpp::Parameter("command_rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&BehaviorPlanner::compute_command, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&BehaviorPlanner::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Behavior planner started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + inject_wrong_direction_ = this->get_parameter("inject_wrong_direction").as_bool(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + command_rate_ = this->get_parameter("command_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "inject_wrong_direction") { + inject_wrong_direction_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Wrong direction %s", + inject_wrong_direction_ ? "enabled" : "disabled"); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", failure_probability_); + } else if (param.get_name() == "command_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "command_rate must be positive"; + return result; + } + command_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&BehaviorPlanner::compute_command, this)); + RCLCPP_INFO(this->get_logger(), "Command rate changed to %.1f Hz", command_rate_); + } + } + + return result; + } + + void on_path(const nav_msgs::msg::Path::SharedPtr msg) + { + current_path_ = *msg; + // Reset waypoint index when a new path arrives + waypoint_index_ = 0; + } + + void compute_command() + { + cmd_count_++; + + // Check for failure injection + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("COMMAND_FAILURE", "Command generation failed (injected)"); + return; + } + + // No path received yet + if (current_path_.poses.empty()) { + publish_diagnostics("NO_PATH", "Waiting for path input"); + return; + } + + // Get current target waypoint + const auto & target = current_path_.poses[waypoint_index_].pose; + + // Simple proportional control toward the waypoint + // Assume current position is origin (0,0) for simplicity in this simulation + double dx = target.position.x; + double dy = target.position.y; + double distance = std::sqrt(dx * dx + dy * dy); + double heading = std::atan2(dy, dx); + + auto cmd = geometry_msgs::msg::Twist(); + + // Proportional gains + constexpr double kp_linear = 0.5; + constexpr double kp_angular = 1.0; + + // Linear velocity toward waypoint + cmd.linear.x = kp_linear * distance; + + // Angular velocity for heading correction + cmd.angular.z = kp_angular * heading; + + // Inject wrong direction: negate linear.x + if (inject_wrong_direction_) { + cmd.linear.x = -cmd.linear.x; + } + + cmd_pub_->publish(cmd); + + // Advance to next waypoint (loop path) + waypoint_index_ = (waypoint_index_ + 1) % static_cast(current_path_.poses.size()); + + // Diagnostics + if (inject_wrong_direction_) { + publish_diagnostics( + "WRONG_DIRECTION", "Driving in reverse direction (injected)"); + } else if (distance > 10.0) { + publish_diagnostics( + "FAR_FROM_WAYPOINT", + "Distance to waypoint: " + std::to_string(distance) + " m"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "behavior_planner"; + diag.hardware_id = "planning_ecu"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "cmd_count"; + kv.value = std::to_string(cmd_count_); + diag.values.push_back(kv); + + kv.key = "waypoint_index"; + kv.value = std::to_string(waypoint_index_); + diag.values.push_back(kv); + + kv.key = "path_length"; + kv.value = std::to_string(current_path_.poses.size()); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr cmd_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Subscription + rclcpp::Subscription::SharedPtr path_sub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + bool inject_wrong_direction_; + double failure_probability_; + double command_rate_; + + // State + nav_msgs::msg::Path current_path_; + int waypoint_index_{0}; + uint64_t cmd_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/planning/path_planner.cpp b/demos/multi_ecu_aggregation/src/planning/path_planner.cpp new file mode 100644 index 0000000..d07c724 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/planning/path_planner.cpp @@ -0,0 +1,261 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file path_planner.cpp +/// @brief Path planner node for the Planning ECU +/// +/// Subscribes to /perception/detections (Detection2DArray) and publishes a +/// 10-waypoint path in the "map" frame. Offsets the path when detections are +/// present to simulate obstacle avoidance. Supports artificial planning delay +/// and failure probability for fault injection. + +#include +#include +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "nav_msgs/msg/path.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "vision_msgs/msg/detection2_d_array.hpp" + +namespace multi_ecu_demo +{ + +class PathPlanner : public rclcpp::Node +{ +public: + PathPlanner() + : Node("path_planner"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters + this->declare_parameter("planning_delay_ms", 0); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("planning_rate", 5.0); // Hz + + load_parameters(); + + // Create publishers + path_pub_ = this->create_publisher("path", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Create subscription to perception detections (absolute topic) + detection_sub_ = this->create_subscription( + "/perception/detections", 10, + std::bind(&PathPlanner::on_detections, this, std::placeholders::_1)); + + // Create timer (with rate validation) + double rate = planning_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'planning_rate' must be > 0.0 Hz, but got %.3f. Falling back to 5.0 Hz.", + rate); + rate = 5.0; + this->set_parameters({rclcpp::Parameter("planning_rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&PathPlanner::plan_path, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&PathPlanner::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Path planner started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + planning_delay_ms_ = this->get_parameter("planning_delay_ms").as_int(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + planning_rate_ = this->get_parameter("planning_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "planning_delay_ms") { + planning_delay_ms_ = param.as_int(); + RCLCPP_INFO(this->get_logger(), "Planning delay changed to %ld ms", planning_delay_ms_); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", failure_probability_); + } else if (param.get_name() == "planning_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "planning_rate must be positive"; + return result; + } + planning_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&PathPlanner::plan_path, this)); + RCLCPP_INFO(this->get_logger(), "Planning rate changed to %.1f Hz", planning_rate_); + } + } + + return result; + } + + void on_detections(const vision_msgs::msg::Detection2DArray::SharedPtr msg) + { + last_detection_count_ = static_cast(msg->detections.size()); + } + + void plan_path() + { + plan_count_++; + + // Intentional blocking sleep to simulate slow computation pipeline. + // This blocks the single-threaded executor, preventing parameter changes + // and other callbacks from being processed during the delay. + if (planning_delay_ms_ > 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(planning_delay_ms_)); + } + + // Check for failure injection + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("PLANNING_FAILURE", "Path planning failed (injected)"); + return; + } + + auto path = nav_msgs::msg::Path(); + path.header.stamp = this->now(); + path.header.frame_id = "map"; + + // Generate 10-waypoint path + constexpr int num_waypoints = 10; + double lateral_offset = 0.0; + + // Offset path if detections exist (obstacle avoidance simulation) + if (last_detection_count_ > 0) { + lateral_offset = 1.0 * last_detection_count_; + } + + for (int i = 0; i < num_waypoints; i++) { + geometry_msgs::msg::PoseStamped pose; + pose.header = path.header; + + // Straight-line path along x-axis with optional lateral offset on y-axis + pose.pose.position.x = static_cast(i) * 2.0; + pose.pose.position.y = lateral_offset; + pose.pose.position.z = 0.0; + + // Orientation: facing forward (identity quaternion) + pose.pose.orientation.w = 1.0; + pose.pose.orientation.x = 0.0; + pose.pose.orientation.y = 0.0; + pose.pose.orientation.z = 0.0; + + path.poses.push_back(pose); + } + + path_pub_->publish(path); + + // Diagnostics + if (planning_delay_ms_ > 100) { + publish_diagnostics( + "SLOW_PLANNING", + "Planning delay: " + std::to_string(planning_delay_ms_) + " ms"); + } else if (last_detection_count_ > 5) { + publish_diagnostics( + "HIGH_OBSTACLE_COUNT", + "Avoiding " + std::to_string(last_detection_count_) + " detections"); + } else { + publish_diagnostics("OK", "Operating normally"); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "path_planner"; + diag.hardware_id = "planning_ecu"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "plan_count"; + kv.value = std::to_string(plan_count_); + diag.values.push_back(kv); + + kv.key = "detection_count"; + kv.value = std::to_string(last_detection_count_); + diag.values.push_back(kv); + + kv.key = "planning_delay_ms"; + kv.value = std::to_string(planning_delay_ms_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr path_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Subscription + rclcpp::Subscription::SharedPtr detection_sub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + int64_t planning_delay_ms_; + double failure_probability_; + double planning_rate_; + + // State + int last_detection_count_{0}; + uint64_t plan_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/src/planning/task_scheduler.cpp b/demos/multi_ecu_aggregation/src/planning/task_scheduler.cpp new file mode 100644 index 0000000..8356f56 --- /dev/null +++ b/demos/multi_ecu_aggregation/src/planning/task_scheduler.cpp @@ -0,0 +1,218 @@ +// Copyright 2026 selfpatch +// SPDX-License-Identifier: Apache-2.0 + +/// @file task_scheduler.cpp +/// @brief Task scheduler node for the Planning ECU +/// +/// Standalone timer node that cycles through task states +/// ("idle" -> "navigating" -> "executing" -> "returning" -> "idle"). +/// Publishes the current state as a String message. Supports stuck-state +/// injection and failure probability for fault injection. + +#include +#include +#include +#include +#include + +#include "diagnostic_msgs/msg/diagnostic_array.hpp" +#include "diagnostic_msgs/msg/diagnostic_status.hpp" +#include "diagnostic_msgs/msg/key_value.hpp" +#include "rclcpp/rclcpp.hpp" +#include "rcl_interfaces/msg/set_parameters_result.hpp" +#include "std_msgs/msg/string.hpp" + +namespace multi_ecu_demo +{ + +class TaskScheduler : public rclcpp::Node +{ +public: + TaskScheduler() + : Node("task_scheduler"), + rng_(std::random_device{}()), + uniform_dist_(0.0, 1.0) + { + // Declare parameters + this->declare_parameter("inject_stuck", false); + this->declare_parameter("failure_probability", 0.0); + this->declare_parameter("schedule_rate", 1.0); // Hz + + load_parameters(); + + // Create publishers + status_pub_ = this->create_publisher("task_status", 10); + diag_pub_ = this->create_publisher( + "/diagnostics", 10); + + // Create timer (with rate validation) + double rate = schedule_rate_; + if (rate <= 0.0) { + RCLCPP_WARN( + this->get_logger(), + "Parameter 'schedule_rate' must be > 0.0 Hz, but got %.3f. Falling back to 1.0 Hz.", + rate); + rate = 1.0; + this->set_parameters({rclcpp::Parameter("schedule_rate", rate)}); + } + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&TaskScheduler::publish_task_status, this)); + + // Register parameter callback + param_callback_handle_ = this->add_on_set_parameters_callback( + std::bind(&TaskScheduler::on_parameter_change, this, std::placeholders::_1)); + + RCLCPP_INFO(this->get_logger(), "Task scheduler started at %.1f Hz", rate); + } + +private: + void load_parameters() + { + inject_stuck_ = this->get_parameter("inject_stuck").as_bool(); + failure_probability_ = this->get_parameter("failure_probability").as_double(); + schedule_rate_ = this->get_parameter("schedule_rate").as_double(); + } + + rcl_interfaces::msg::SetParametersResult on_parameter_change( + const std::vector & parameters) + { + rcl_interfaces::msg::SetParametersResult result; + result.successful = true; + + for (const auto & param : parameters) { + if (param.get_name() == "inject_stuck") { + inject_stuck_ = param.as_bool(); + RCLCPP_INFO( + this->get_logger(), "Stuck injection %s", + inject_stuck_ ? "enabled" : "disabled"); + } else if (param.get_name() == "failure_probability") { + failure_probability_ = param.as_double(); + RCLCPP_INFO( + this->get_logger(), "Failure probability changed to %.2f", failure_probability_); + } else if (param.get_name() == "schedule_rate") { + double rate = param.as_double(); + if (rate <= 0.0) { + result.successful = false; + result.reason = "schedule_rate must be positive"; + return result; + } + schedule_rate_ = rate; + auto period = std::chrono::duration(1.0 / rate); + timer_ = this->create_wall_timer( + std::chrono::duration_cast(period), + std::bind(&TaskScheduler::publish_task_status, this)); + RCLCPP_INFO(this->get_logger(), "Schedule rate changed to %.1f Hz", schedule_rate_); + } + } + + return result; + } + + void publish_task_status() + { + tick_count_++; + + // Check for failure injection + if (uniform_dist_(rng_) < failure_probability_) { + publish_diagnostics("SCHEDULER_FAILURE", "Task scheduling failed (injected)"); + return; + } + + // Get current state + const std::string & current_state = task_states_[state_index_]; + + // Publish current state + auto msg = std_msgs::msg::String(); + msg.data = current_state; + status_pub_->publish(msg); + + // Advance to next state (unless stuck) + if (!inject_stuck_) { + state_index_ = (state_index_ + 1) % static_cast(task_states_.size()); + } + + // Diagnostics + if (inject_stuck_) { + publish_diagnostics( + "STUCK", "Task stuck in state: " + current_state); + } else { + publish_diagnostics("OK", "Current state: " + current_state); + } + } + + void publish_diagnostics(const std::string & status, const std::string & message) + { + auto diag_array = diagnostic_msgs::msg::DiagnosticArray(); + diag_array.header.stamp = this->now(); + + auto diag = diagnostic_msgs::msg::DiagnosticStatus(); + diag.name = "task_scheduler"; + diag.hardware_id = "planning_ecu"; + + if (status == "OK") { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::OK; + } else { + diag.level = diagnostic_msgs::msg::DiagnosticStatus::ERROR; + } + + diag.message = message; + + diagnostic_msgs::msg::KeyValue kv; + kv.key = "status"; + kv.value = status; + diag.values.push_back(kv); + + kv.key = "tick_count"; + kv.value = std::to_string(tick_count_); + diag.values.push_back(kv); + + kv.key = "current_state"; + kv.value = task_states_[state_index_]; + diag.values.push_back(kv); + + kv.key = "state_index"; + kv.value = std::to_string(state_index_); + diag.values.push_back(kv); + + diag_array.status.push_back(diag); + diag_pub_->publish(diag_array); + } + + // Publishers + rclcpp::Publisher::SharedPtr status_pub_; + rclcpp::Publisher::SharedPtr diag_pub_; + + // Timer + rclcpp::TimerBase::SharedPtr timer_; + + // Parameter callback + rclcpp::node_interfaces::OnSetParametersCallbackHandle::SharedPtr param_callback_handle_; + + // Random number generation + std::mt19937 rng_; + std::uniform_real_distribution uniform_dist_; + + // Parameters + bool inject_stuck_; + double failure_probability_; + double schedule_rate_; + + // State machine + const std::vector task_states_ = { + "idle", "navigating", "executing", "returning" + }; + int state_index_{0}; + uint64_t tick_count_{0}; +}; + +} // namespace multi_ecu_demo + +int main(int argc, char ** argv) +{ + rclcpp::init(argc, argv); + rclcpp::spin(std::make_shared()); + rclcpp::shutdown(); + return 0; +} diff --git a/demos/multi_ecu_aggregation/stop-demo.sh b/demos/multi_ecu_aggregation/stop-demo.sh new file mode 100755 index 0000000..e2e2a8c --- /dev/null +++ b/demos/multi_ecu_aggregation/stop-demo.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Stop Multi-ECU Aggregation Demo + +set -eu + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "Stopping Multi-ECU Aggregation Demo" +echo "=====================================" + +# Check for Docker +if ! command -v docker &> /dev/null; then + echo "Error: Docker is not installed" + exit 1 +fi + +# Parse arguments +REMOVE_VOLUMES="" +REMOVE_IMAGES="" + +usage() { + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -v, --volumes Remove named volumes" + echo " --images Remove images" + echo " -h, --help Show this help message" + echo "" + echo "Examples:" + echo " $0 # Stop containers" + echo " $0 --volumes # Stop and remove volumes" + echo " $0 --images # Stop and remove images" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -v|--volumes) + echo "Will remove named volumes" + REMOVE_VOLUMES="-v" + ;; + --images) + echo "Will remove images" + REMOVE_IMAGES="--rmi all" + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" + usage + exit 1 + ;; + esac + shift +done + +# Stop containers +echo "Stopping containers..." +if docker compose version &> /dev/null; then + # shellcheck disable=SC2086 + docker compose down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} +else + # shellcheck disable=SC2086 + docker-compose down ${REMOVE_VOLUMES} ${REMOVE_IMAGES} +fi + +echo "" +echo "Demo stopped successfully!" diff --git a/tests/smoke_test_multi_ecu.sh b/tests/smoke_test_multi_ecu.sh new file mode 100755 index 0000000..3a5e2a2 --- /dev/null +++ b/tests/smoke_test_multi_ecu.sh @@ -0,0 +1,144 @@ +#!/bin/bash +# Smoke tests for multi_ecu_aggregation demo +# Runs from the host against the perception ECU gateway on localhost:8080 +# The perception ECU is the primary aggregator - it should expose local entities +# plus aggregated entities from planning and actuation ECUs. +# +# Usage: ./tests/smoke_test_multi_ecu.sh [GATEWAY_URL] +# Default GATEWAY_URL: http://localhost:8080 + +GATEWAY_URL="${1:-http://localhost:8080}" +# shellcheck disable=SC2034 # Used by smoke_lib.sh +API_BASE="${GATEWAY_URL}/api/v1" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=tests/smoke_lib.sh +source "${SCRIPT_DIR}/smoke_lib.sh" + +trap print_summary EXIT + +# --- Wait for gateway startup --- + +# Multi-ECU demo needs extra time: 3 containers building their ROS graphs +# plus aggregation discovery across the Docker network +wait_for_gateway 120 + +# Wait for runtime node linking (perception ECU local nodes) +wait_for_runtime_linking "/apps/lidar-driver/data" 90 + +# Wait for aggregation to discover peer ECUs +echo " Waiting for aggregated entities from planning ECU (max 60s)..." +if poll_until "/apps" '.items[] | select(.id == "path-planner")' 60; then + echo -e " ${GREEN}Aggregation discovery complete${NC}" +else + echo -e " ${RED}Aggregation discovery did not complete within 60s${NC}" + exit 1 +fi + +# --- Tests --- + +section "Health" + +if api_get "/health"; then + pass "GET /health returns 200" +else + fail "GET /health returns 200" "unexpected status code" +fi + +section "Entity Discovery - Components" + +# robot-alpha is the top-level parent component shared across all 3 ECUs +if api_get "/components"; then + if echo "$RESPONSE" | items_contain_id "robot-alpha"; then + pass "components contains 'robot-alpha'" + else + fail "components contains 'robot-alpha'" "not found in response" + fi +else + fail "GET /components returns 200" "unexpected status code" +fi + +# ECU components are sub-components of robot-alpha (parent_component_id set) +# They appear under /components/robot-alpha/subcomponents, not /components +if api_get "/components/robot-alpha/subcomponents"; then + for comp_id in perception-ecu planning-ecu actuation-ecu; do + if echo "$RESPONSE" | items_contain_id "$comp_id"; then + pass "subcomponents contains '${comp_id}'" + else + fail "subcomponents contains '${comp_id}'" "not found in response" + fi + done +else + fail "GET /components/robot-alpha/subcomponents returns 200" "unexpected status code" +fi + +section "Entity Discovery - Apps" + +# 10 demo apps across 3 ECUs: +# Perception: lidar-driver, camera-driver, point-cloud-filter, object-detector +# Planning: path-planner, behavior-planner, task-scheduler +# Actuation: motor-controller, joint-driver, gripper-controller +if api_get "/apps"; then + for app_id in \ + lidar-driver camera-driver point-cloud-filter object-detector \ + path-planner behavior-planner task-scheduler \ + motor-controller joint-driver gripper-controller; do + if echo "$RESPONSE" | items_contain_id "$app_id"; then + pass "apps contains '${app_id}'" + else + fail "apps contains '${app_id}'" "not found in response" + fi + done + # Verify at least 10 demo apps are present + local_count=$(echo "$RESPONSE" | jq '[.items[] | select(.id | test("^(lidar|camera|point|object|path|behavior|task|motor|joint|gripper)"))] | length') + if [ "$local_count" -ge 10 ]; then + pass "at least 10 demo apps discovered" + else + fail "at least 10 demo apps discovered" "found ${local_count}" + fi +else + fail "GET /apps returns 200" "unexpected status code" +fi + +section "Entity Discovery - Functions" + +# 3 cross-ECU functions: autonomous-navigation, obstacle-avoidance, task-execution +if api_get "/functions"; then + for func_id in autonomous-navigation obstacle-avoidance task-execution; do + if echo "$RESPONSE" | items_contain_id "$func_id"; then + pass "functions contains '${func_id}'" + else + fail "functions contains '${func_id}'" "not found in response" + fi + done +else + fail "GET /functions returns 200" "unexpected status code" +fi + +section "Data Access" + +# Test data access on a local perception app +assert_non_empty_items "/apps/lidar-driver/data" + +section "Configurations" + +# Test configurations on a local perception app +assert_non_empty_items "/apps/lidar-driver/configurations" + +section "Faults" + +if api_get "/faults"; then + pass "GET /faults returns 200" +else + fail "GET /faults returns 200" "unexpected status code" +fi + +section "Scripts" + +# Perception ECU scripts should include inject-sensor-failure +assert_scripts_list "perception-ecu" "inject-sensor-failure" + +# --- Summary --- + +# print_summary runs via EXIT trap; exit code reflects test results +[ "$FAIL_COUNT" -eq 0 ]