diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 67722f9..9ca7db4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -22,7 +22,7 @@ RUN find /tmp/src -type f \! -name "package.xml" -print | xargs rm -rf FROM base AS depbuilder # copy the reduced source tree (only package.xml) from previous stage COPY --from=sourcefilter /tmp/src /tmp/src -RUN rosdep update --rosdistro ${ROS_DISTRO} && apt-get update +RUN rosdep update --rosdistro ${ROS_DISTRO} && apt-get update && apt-get upgrade --no-install-recommends -y RUN cd /tmp/src && rosdep install --from-paths . --ignore-src -r -y && cd && rm -rf /tmp/src FROM depbuilder AS final diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh index 56bd180..2ecc236 100755 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -17,6 +17,11 @@ function add_git_config_if_not_exist { add_config_if_not_exist "source /opt/ros/$ROS_DISTRO/setup.bash" +rosdep update --rosdistro $ROS_DISTRO + +sudo apt-get update && sudo apt-get upgrade --no-install-recommends -y +rosdep install --from-paths src/ --ignore-src -r -y + source /opt/ros/$ROS_DISTRO/setup.bash colcon build --symlink-install --continue-on-error || true diff --git a/.github/workflows/ros-ci.yml b/.github/workflows/ros-ci.yml index ede905c..293066b 100644 --- a/.github/workflows/ros-ci.yml +++ b/.github/workflows/ros-ci.yml @@ -49,11 +49,7 @@ jobs: container: image: ${{ matrix.docker_image }} steps: - - uses: actions/checkout@v5 - with: - fetch-depth: 1 - token: ${{ secrets.ORGA_GH_TOKEN }} - + - uses: actions/checkout@v3 - name: setup ROS environment uses: LCAS/setup-ros@master with: @@ -69,6 +65,6 @@ jobs: if: ${{ matrix.ros_version == 2 }} uses: ros-tooling/action-ros-ci@v0.3 with: - import-token: ${{ secrets.ORGA_GH_TOKEN }} + import-token: ${{ github.token }} target-ros2-distro: ${{ matrix.ros_distribution }} skip-tests: true diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index fbad70b..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "src/RobotStateMachine"] - path = src/RobotStateMachine - url = https://github.com/LCAS/RobotStateMachine.git diff --git a/ARCHITECTURE_INTEGRATION.md b/ARCHITECTURE_INTEGRATION.md new file mode 100644 index 0000000..fde73fe --- /dev/null +++ b/ARCHITECTURE_INTEGRATION.md @@ -0,0 +1,1251 @@ +# Concept Architecture: Integration of Sentor, RobotStateMachine, and Nav2 + +## Executive Summary + +This document outlines the architectural design for integrating three critical systems to ensure safe and compliant autonomous navigation: +1. **Sentor** - Safety monitoring and heartbeat system +2. **RobotStateMachine** - State management for robot operational modes +3. **Nav2** - Autonomous navigation stack + +The core safety requirement is that autonomous navigation shall only occur when: +``` +/robot_state == "active" AND /autonomous_mode == true +``` + +Any violation of this condition must immediately stop robot motion and terminate active navigation goals. + +--- + +## Table of Contents + +1. [System Overview](#system-overview) +2. [Component Responsibilities](#component-responsibilities) +3. [Integration Architecture](#integration-architecture) +4. [Safety-Critical Topics and Interfaces](#safety-critical-topics-and-interfaces) +5. [State Transition Handling](#state-transition-handling) +6. [Nav2 Integration Strategies](#nav2-integration-strategies) +7. [Emergency Stop Behavior](#emergency-stop-behavior) +8. [Implementation Recommendations](#implementation-recommendations) +9. [Testing and Validation Strategy](#testing-and-validation-strategy) +10. [Failure Modes and Mitigation](#failure-modes-and-mitigation) +11. [Sentor Guard Package](#sentor-guard-package) + +--- + +## System Overview + +### Architecture Diagram + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ System Architecture │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ RobotStateMachine│ │ Sentor │ │ +│ │ │ │ │ │ +│ │ - State Mgmt │ │ - Safety Mon │ │ +│ │ - Mode Mgmt │ │ - Heartbeats │ │ +│ └────────┬─────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ │ /robot_state │ /safety/heartbeat │ +│ │ /autonomous_mode │ /warning/heartbeat │ +│ │ │ │ +│ └──────────┬──────────────┘ │ +│ ↓ │ +│ ┌─────────────────────┐ │ +│ │ Safety Controller │ (New Component) │ +│ │ Nav2 Lifecycle Mgr │ │ +│ └──────────┬──────────┘ │ +│ │ │ +│ │ Control signals │ +│ ↓ │ +│ ┌─────────────────────┐ │ +│ │ Nav2 Stack │ │ +│ │ │ │ +│ │ - BT Navigator │ │ +│ │ - Controller │ │ +│ │ - Planner │ │ +│ └─────────────────────┘ │ +│ │ │ +│ ↓ │ +│ ┌─────────────────────┐ │ +│ │ Robot Base │ │ +│ │ (cmd_vel consumer) │ │ +│ └─────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Component Responsibilities + +### 1. RobotStateMachine + +**Repository**: https://github.com/LCAS/RobotStateMachine + +**Responsibilities**: +- Publish current robot operational state on `/robot_state` (e.g., "active", "paused", "emergency_stop", "idle") +- Publish autonomous mode status on `/autonomous_mode` (Boolean) +- Manage state transitions based on operator input, system events, and safety conditions +- Coordinate with safety systems through service calls or action interfaces + +**Key Topics Published**: +- `/robot_state` (std_msgs/String or custom msg): Current robot state +- `/autonomous_mode` (std_msgs/Bool): Whether autonomous operation is enabled + +**Key Services Provided**: +- State transition requests (e.g., SetState, SetAutonomousMode) + +### 2. Sentor + +**Current Repository**: LCAS/sentor + +**Responsibilities**: +- Monitor critical topics and nodes for health and safety +- Publish safety heartbeat (`/safety/heartbeat`) for safety-critical systems +- Publish warning heartbeat (`/warning/heartbeat`) for autonomy-critical systems +- Trigger safety responses when monitored conditions fail +- Provide override services for manual safety intervention + +**Key Topics Published**: +- `/safety/heartbeat` (std_msgs/Bool): TRUE when all safety-critical monitors pass +- `/warning/heartbeat` (std_msgs/Bool): TRUE when all autonomy-critical monitors pass + +**Key Services Provided**: +- `/sentor/override_safety` (SetBool): Manual safety override +- `/sentor/override_warning` (SetBool): Manual warning override + +### 3. Nav2 Stack + +**Documentation**: https://docs.nav2.org/ + +**Responsibilities**: +- Execute autonomous navigation tasks +- Follow paths while avoiding obstacles +- Respond to preemption and cancellation requests +- Maintain behavior trees for navigation logic + +**Key Topics Subscribed**: +- `/goal_pose` (geometry_msgs/PoseStamped): Navigation goals + +**Key Topics Published**: +- `/cmd_vel` (geometry_msgs/Twist): Velocity commands to robot base + +**Key Actions**: +- `NavigateToPose`: Navigate to a specific pose +- `FollowPath`: Follow a pre-computed path + +### 4. Safety Controller Node (NEW) + +**Proposed New Component** + +**Responsibilities**: +- Subscribe to `/robot_state`, `/autonomous_mode`, `/safety/heartbeat`, `/warning/heartbeat` +- Compute combined safety condition: `robot_state == "active" AND autonomous_mode == true` +- Control Nav2 lifecycle states (activate/deactivate/pause) +- Cancel active navigation goals when safety conditions become invalid +- Optionally gate `cmd_vel` commands as a last-resort safety measure + +**Implementation Options**: See [Nav2 Integration Strategies](#nav2-integration-strategies) + +--- + +## Integration Architecture + +### Information Flow + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Information Flow │ +└──────────────────────────────────────────────────────────────────┘ + + RobotStateMachine Sentor Safety Controller + │ │ │ + │ /robot_state │ │ + ├─────────────────────────────────────────────────>│ + │ /autonomous_mode │ │ + ├─────────────────────────────────────────────────>│ + │ │ /safety/heartbeat │ + │ ├──────────────────────────>│ + │ │ /warning/heartbeat │ + │ ├──────────────────────────>│ + │ │ │ + │ │ [Evaluates: │ + │ │ state+mode+ │ + │ │ heartbeats] │ + │ │ │ + │ │ IF SAFE: │ + │ │ - Activate Nav2 │ + │ │ - Allow navigation │ + │ │ │ + │ │ IF UNSAFE: │ + │ │ - Pause Nav2 │ + │ │ - Cancel goals │ + │ │ - Gate cmd_vel │ + │ │ │ + │ │ ├─────> + │ │ │ Nav2 + │ │ │ - Lifecycle + │ │ │ - Goal Cancel +``` + +### Key Integration Points + +1. **Topic Subscriptions**: Safety Controller subscribes to all decision-making topics +2. **Nav2 Lifecycle Management**: Safety Controller uses Nav2 lifecycle services +3. **Goal Cancellation**: Safety Controller can cancel navigation actions +4. **Velocity Gating**: Optional safety layer that can zero out cmd_vel + +--- + +## Safety-Critical Topics and Interfaces + +### Required Topics + +| Topic | Type | Publisher | Purpose | +|-------|------|-----------|---------| +| `/robot_state` | std_msgs/String (or custom) | RobotStateMachine | Current operational state | +| `/autonomous_mode` | std_msgs/Bool | RobotStateMachine | Autonomous mode flag | +| `/safety/heartbeat` | std_msgs/Bool | Sentor | Safety-critical system health | +| `/warning/heartbeat` | std_msgs/Bool | Sentor | Autonomy-critical system health | +| `/cmd_vel` | geometry_msgs/Twist | Nav2 | Velocity commands (monitored/gated) | + +### Recommended Topics for Monitoring + +The following topics should be configured in Sentor's monitoring configuration: + +1. **Navigation Stack Topics** (autonomy_critical: true): + - `/odom` - Odometry feed + - `/scan` or `/lidar` - Obstacle detection sensors + - `/map` - Localization map + - `/amcl_pose` or `/pose` - Localization output + +2. **Critical Sensor Topics** (safety_critical: true): + - Emergency stop button status + - Battery voltage/state + - Motor controller status + - Critical safety sensors + +3. **Node Monitors** (autonomy_critical: true): + - Nav2 controller server + - Nav2 planner server + - Nav2 behavior server + - Localization node (AMCL or other) + +### Required Services + +| Service | Type | Provider | Purpose | +|---------|------|----------|---------| +| Nav2 Lifecycle Services | lifecycle_msgs/srv/ChangeState | Nav2 nodes | Control Nav2 node states | +| Goal Cancellation | action_msgs/srv/CancelGoal | Nav2 action servers | Stop active navigation | +| State Transition | custom_msgs/srv/SetState | RobotStateMachine | Change robot state | + +--- + +## State Transition Handling + +### Valid States for Autonomous Navigation + +Only the following condition permits autonomous navigation: +```python +safe_to_navigate = (robot_state == "active" and + autonomous_mode == True and + safety_heartbeat == True and + warning_heartbeat == True) +``` + +### State Transition Scenarios + +#### Scenario 1: Normal Activation +``` +Initial: robot_state="idle", autonomous_mode=false +1. Operator enables autonomous mode → autonomous_mode=true +2. Operator activates robot → robot_state="active" +3. All monitors healthy → safety_heartbeat=true, warning_heartbeat=true +4. Safety Controller activates Nav2 → Navigation enabled +``` + +#### Scenario 2: Emergency Stop During Navigation +``` +Active Navigation: robot_state="active", autonomous_mode=true +1. Emergency stop pressed → robot_state="emergency_stop" +2. Safety Controller detects state change (< 100ms) +3. Immediate actions: + a. Cancel all active Nav2 goals + b. Transition Nav2 to inactive lifecycle state + c. Publish zero velocity to cmd_vel (if gating enabled) +4. Result: Robot stops immediately, navigation preempted +``` + +#### Scenario 3: Sensor Failure During Navigation +``` +Active Navigation: All conditions satisfied +1. Critical sensor stops publishing → Sentor detects failure +2. warning_heartbeat → false (autonomy_critical sensor failed) +3. Safety Controller detects heartbeat change +4. Immediate actions: + a. Cancel active navigation goal + b. Pause Nav2 (optional: deactivate) + c. Gate cmd_vel to zero +5. Result: Robot stops, waits for recovery or manual intervention +``` + +#### Scenario 4: Mode Change During Navigation +``` +Active Navigation: robot_state="active", autonomous_mode=true +1. Operator switches to manual mode → autonomous_mode=false +2. Safety Controller detects mode change +3. Immediate actions: + a. Cancel active navigation goal + b. Deactivate Nav2 or transition to paused +4. Result: Robot stops autonomous navigation, ready for manual control +``` + +#### Scenario 5: Recovery After Fault +``` +Stopped: warning_heartbeat=false (sensor recovered) +1. Sensor resumes normal operation +2. Sentor detects recovery after safe_operation_timeout +3. warning_heartbeat → true +4. Safety Controller observes all conditions satisfied +5. Safety Controller reactivates Nav2 → Navigation can resume +Note: Navigation goals are NOT automatically reissued; operator or higher-level + planner must send new goals. +``` + +### Reaction Time Requirements + +- **State/Mode Change Detection**: < 100ms +- **Goal Cancellation**: < 200ms +- **Velocity Command Gating**: < 50ms +- **Total Stop Time**: < 500ms from trigger to zero motion + +--- + +## Nav2 Integration Strategies + +There are multiple approaches to integrate safety conditions with Nav2. We recommend a layered approach combining lifecycle management and behavior tree integration. + +### Strategy 1: Lifecycle Management (RECOMMENDED) + +**Approach**: Control Nav2 node lifecycle states based on safety conditions. + +**Implementation**: +1. Safety Controller subscribes to all safety topics +2. When safe_to_navigate becomes FALSE: + - Call lifecycle transition services to deactivate Nav2 nodes + - Cancel any active navigation actions +3. When safe_to_navigate becomes TRUE: + - Activate Nav2 nodes to ready state + - Allow new navigation goals + +**Pros**: +- Clean separation of concerns +- Well-defined ROS2 lifecycle pattern +- Nav2 fully aware of activation/deactivation +- No modification to Nav2 required + +**Cons**: +- Lifecycle transitions take 100-500ms +- Need to manage state of multiple Nav2 nodes +- May be too slow for immediate emergency stops + +**Example Lifecycle States**: +``` +INACTIVE → ACTIVE: When safe_to_navigate becomes true +ACTIVE → INACTIVE: When safe_to_navigate becomes false +``` + +### Strategy 2: Behavior Tree Plugin (RECOMMENDED for Fine-Grained Control) + +**Approach**: Create custom BT condition nodes that check safety conditions. + +**Implementation**: +1. Create custom Nav2 BT plugin: `CheckSafetyCondition` +2. Plugin subscribes to `/robot_state`, `/autonomous_mode`, heartbeats +3. BT returns FAILURE when safety conditions invalid +4. Nav2 behavior tree configured with condition checks at strategic points + +**Pros**: +- Fine-grained control within navigation logic +- Fast response (BT ticks at ~10-20Hz) +- Integrates naturally with Nav2 architecture +- Can handle different safety levels differently + +**Cons**: +- Requires custom Nav2 plugin development +- Must modify Nav2 behavior tree XML +- Safety logic distributed between Safety Controller and BT + +**Example BT Structure**: +```xml + + + + + + + + + +``` + +### Strategy 3: Action Server Wrapper (ALTERNATIVE) + +**Approach**: Wrap Nav2 action servers with safety-aware proxy action servers. + +**Implementation**: +1. Create proxy action server for NavigateToPose +2. Proxy checks safety conditions before forwarding goals to Nav2 +3. Proxy monitors conditions during execution, cancels if invalid +4. Higher-level planners call proxy instead of Nav2 directly + +**Pros**: +- No modification to Nav2 +- Centralized safety logic +- Can add additional functionality (logging, metrics) + +**Cons**: +- Additional latency from proxy layer +- Complexity of maintaining action state +- Must implement proxy for each action type + +### Strategy 4: cmd_vel Filter Node (LAST RESORT SAFETY) + +**Approach**: Final safety gate that can zero out velocity commands. + +**Implementation**: +1. Place filter node between Nav2 and robot base +2. Filter subscribes to safety condition topics +3. Filter passes through cmd_vel when safe, zeros it when unsafe +4. Acts as hardware-level safety cutoff + +**Pros**: +- Immediate response (< 50ms) +- Works regardless of Nav2 state +- Last line of defense +- Simple implementation + +**Cons**: +- Doesn't provide feedback to Nav2 (Nav2 thinks it's still navigating) +- Can cause confusion in Nav2 state machine +- Should be used in addition to, not instead of, proper integration + +### Recommended Hybrid Approach + +**Combine multiple strategies for defense-in-depth**: + +1. **Primary**: Lifecycle Management (Strategy 1) + - Activate/deactivate Nav2 based on safety conditions + - Provides clean state transitions + +2. **Secondary**: Behavior Tree Integration (Strategy 2) + - Add safety condition checks in BT for faster response + - Allows graceful handling within navigation logic + +3. **Tertiary**: cmd_vel Filter (Strategy 4) + - Emergency safety gate as last resort + - Ensures robot never moves when unsafe, even if primary/secondary fail + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Multi-Layer Safety Architecture │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ Layer 1: Safety Controller (Lifecycle Management) │ +│ └─> Activates/Deactivates Nav2 nodes │ +│ └─> Cancels navigation goals │ +│ │ +│ Layer 2: Nav2 Behavior Tree (Condition Checks) │ +│ └─> Safety conditions checked in BT │ +│ └─> Fails gracefully when conditions invalid │ +│ │ +│ Layer 3: cmd_vel Filter (Emergency Gate) │ +│ └─> Zeros velocity commands when unsafe │ +│ └─> Last line of defense │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Emergency Stop Behavior + +### Requirements + +When any safety condition becomes invalid, the system must: + +1. **Immediately stop motion** (< 500ms total latency) +2. **Cancel all active navigation goals** +3. **Prevent new navigation goals from starting** +4. **Report failure status** appropriately to calling systems +5. **Log the event** for debugging and safety analysis + +### Implementation Sequence + +``` +┌────────────────────────────────────────────────────────────┐ +│ Emergency Stop Sequence │ +└────────────────────────────────────────────────────────────┘ + +1. Safety Condition Invalid Detected (t=0ms) + ↓ +2. Safety Controller Triggered (t < 50ms) + ├─> Cancel all active Nav2 action goals + ├─> Publish zero cmd_vel (if filtering enabled) + └─> Transition Nav2 lifecycle to INACTIVE + ↓ +3. Nav2 Receives Cancellation (t < 200ms) + ├─> Behavior Tree preempted + ├─> Local planner stops generating paths + └─> Controller stops publishing cmd_vel + ↓ +4. Robot Motion Stops (t < 500ms) + └─> Velocity commands cease + ↓ +5. System in Safe State + ├─> Navigation disabled + ├─> Robot stationary + └─> Waiting for safety conditions to be restored +``` + +### Recovery Procedure + +After safety conditions are restored: + +1. **Validation Period**: Wait for Sentor's `safe_operation_timeout` (default: 10s) +2. **Heartbeat Confirmation**: Ensure heartbeats remain TRUE +3. **State Verification**: Confirm robot_state == "active" and autonomous_mode == true +4. **Nav2 Reactivation**: Transition Nav2 nodes back to ACTIVE +5. **Ready for Commands**: System ready to accept new navigation goals + +**Important**: The system should NOT automatically resume interrupted navigation. The higher-level planner or operator must explicitly send a new navigation goal. + +--- + +## Implementation Recommendations + +### Phase 1: Safety Controller Node Development + +**Priority**: HIGH +**Dependencies**: None + +**Tasks**: +1. Create new ROS2 package: `sentor_safety_controller` +2. Implement Safety Controller node with: + - Subscriptions to all safety-critical topics + - Logic to evaluate combined safety condition + - Publisher for aggregated safety status (optional, for monitoring) + - Service client for Nav2 lifecycle management + - Action client for goal cancellation +3. Add configurable parameters: + - Topic names for flexibility + - Reaction time thresholds + - Logging verbosity +4. Implement comprehensive logging for all state changes + +**Example Configuration**: +```yaml +safety_controller: + ros__parameters: + robot_state_topic: "/robot_state" + autonomous_mode_topic: "/autonomous_mode" + safety_heartbeat_topic: "/safety/heartbeat" + warning_heartbeat_topic: "/warning/heartbeat" + + nav2_controller_node: "/controller_server" + nav2_planner_node: "/planner_server" + nav2_bt_navigator_node: "/bt_navigator" + + reaction_time_threshold: 0.1 # seconds + enable_cmd_vel_filter: true + + expected_active_state: "active" # Expected value for robot_state +``` + +### Phase 2: Sentor Configuration Enhancement + +**Priority**: HIGH +**Dependencies**: Understanding of Nav2 deployment + +**Tasks**: +1. Create reference Sentor configuration for Nav2 integration +2. Define monitoring rules for: + - Navigation stack nodes (autonomy_critical: true) + - Critical sensors (safety_critical: true) + - Localization topics (autonomy_critical: true) +3. Set appropriate timeouts and thresholds +4. Document configuration guidelines + +**Example Sentor Configuration Snippet**: +```yaml +monitors: + - name: "/scan" + message_type: "sensor_msgs/msg/LaserScan" + rate: 10.0 + signal_when: + condition: "published" + timeout: 1.0 + autonomy_critical: true + tags: ["navigation", "obstacle_detection"] + +node_monitors: + - name: "/controller_server" + timeout: 2.0 + autonomy_critical: true + tags: ["nav2", "controller"] + + - name: "/planner_server" + timeout: 2.0 + autonomy_critical: true + tags: ["nav2", "planner"] +``` + +### Phase 3: Nav2 Behavior Tree Plugin (Optional but Recommended) + +**Priority**: MEDIUM +**Dependencies**: Phase 1 complete + +**Tasks**: +1. Create custom Nav2 BT plugin package: `sentor_nav2_bt_plugins` +2. Implement `CheckSafetyCondition` BT node +3. Implement `CheckRobotState` BT node +4. Create example BT XML configurations +5. Document BT integration patterns + +### Phase 4: cmd_vel Filter Node (Safety Backup) + +**Priority**: MEDIUM +**Dependencies**: Phase 1 complete + +**Tasks**: +1. Create velocity filter node package: `sentor_velocity_filter` +2. Implement filter with: + - Input: cmd_vel from Nav2 + - Output: filtered cmd_vel to robot base + - Safety condition checking + - Configurable ramping for smooth stops +3. Add telemetry and diagnostics +4. Test with various robot bases + +### Phase 5: Integration Testing Framework + +**Priority**: HIGH +**Dependencies**: Phases 1-4 + +**Tasks**: +1. Create simulation environment (Gazebo/Ignition) +2. Implement test scenarios for each failure mode +3. Develop automated test suite +4. Create validation metrics and dashboards +5. Document test procedures and acceptance criteria + +### Phase 6: Documentation and Training + +**Priority**: MEDIUM +**Dependencies**: All phases + +**Tasks**: +1. Create integration guide for robot deployers +2. Document configuration templates +3. Write troubleshooting guide +4. Create training materials +5. Produce video tutorials + +--- + +## Testing and Validation Strategy + +### Test Categories + +#### 1. Unit Tests + +Test individual components in isolation: +- Safety Controller logic (condition evaluation) +- Topic callback handling +- Service call mechanisms +- State machine transitions + +#### 2. Integration Tests + +Test component interactions: +- Safety Controller ↔ Nav2 lifecycle +- Safety Controller ↔ RobotStateMachine +- Sentor ↔ Safety Controller +- End-to-end safety condition propagation + +#### 3. Scenario Tests + +Test real-world scenarios: +- Normal navigation operation +- Emergency stop during motion +- Sensor failure recovery +- Mode switching during navigation +- Multiple simultaneous failures + +#### 4. Performance Tests + +Validate timing requirements: +- Reaction time measurements (< 100ms target) +- End-to-end stop time (< 500ms target) +- System latency under load +- Resource utilization + +### Test Scenarios + +#### Scenario 1: Emergency Stop During Navigation + +**Setup**: +- Robot navigating autonomously +- All safety conditions satisfied + +**Trigger**: Simulate emergency stop button press (robot_state → "emergency_stop") + +**Expected Behavior**: +1. Safety Controller detects state change within 100ms +2. Navigation goal cancelled within 200ms +3. Robot motion stops within 500ms +4. Nav2 in inactive state +5. Event logged with timestamp + +**Validation**: +- Record all timestamps +- Verify no cmd_vel published after stop +- Verify Nav2 action status reported as ABORTED or PREEMPTED + +#### Scenario 2: Critical Sensor Failure + +**Setup**: +- Robot navigating autonomously +- All safety conditions satisfied + +**Trigger**: Stop publishing on critical sensor topic (e.g., /scan) + +**Expected Behavior**: +1. Sentor detects missing messages within sensor timeout +2. warning_heartbeat → false +3. Safety Controller cancels navigation +4. Robot stops +5. System waits for sensor recovery + +**Validation**: +- Verify heartbeat transitions +- Verify navigation preempted +- Verify system ready to resume after recovery + +#### Scenario 3: Autonomous Mode Disabled During Navigation + +**Setup**: +- Robot navigating autonomously + +**Trigger**: Set autonomous_mode → false + +**Expected Behavior**: +1. Safety Controller detects mode change +2. Navigation cancelled +3. Robot stops +4. Manual control enabled + +**Validation**: +- Verify mode change detected +- Verify clean shutdown of navigation +- Verify manual control commands work + +#### Scenario 4: Recovery After Transient Failure + +**Setup**: +- System in stopped state due to sensor failure +- Sensor recovers and resumes publishing + +**Expected Behavior**: +1. Sentor detects sensor recovery +2. After safe_operation_timeout, heartbeat → true +3. Safety Controller enables Nav2 +4. System ready for new navigation goals + +**Validation**: +- Verify timeout period honored +- Verify Nav2 properly reactivated +- Verify new goals can be executed + +### Validation Metrics + +| Metric | Target | Critical | +|--------|--------|----------| +| State change detection latency | < 100ms | YES | +| Goal cancellation latency | < 200ms | YES | +| Total stop time | < 500ms | YES | +| False positive rate | < 0.1% | NO | +| System availability | > 99.9% | NO | +| Recovery time after transient fault | < 15s | NO | + +### Test Environment Setup + +**Simulation**: +- Use Gazebo or Ignition with Nav2-compatible robot +- Implement mock RobotStateMachine node +- Configure Sentor with test monitors +- Create test scenarios with scripted triggers + +**Hardware**: +- Test on actual robot platform +- Use real sensors and safety systems +- Validate timing on target compute platform +- Test with actual emergency stop hardware + +--- + +## Failure Modes and Mitigation + +### Failure Mode 1: Safety Controller Node Crash + +**Symptom**: Safety Controller stops running during navigation + +**Risk**: Robot continues navigating without safety oversight + +**Mitigation**: +1. **Watchdog**: Implement watchdog that monitors Safety Controller heartbeat +2. **Failsafe**: Configure Nav2 with conservative behavior (lower speeds, larger safety margins) +3. **Redundancy**: Run multiple Safety Controller instances with different priorities +4. **Monitoring**: Add Safety Controller to Sentor node monitors as safety_critical + +### Failure Mode 2: Topic Communication Failure + +**Symptom**: Safety topics not received by Safety Controller + +**Risk**: Stale safety data leads to incorrect decisions + +**Mitigation**: +1. **Timeouts**: Implement message age checks, treat old data as invalid +2. **QoS Settings**: Use reliable QoS for safety-critical topics +3. **Monitoring**: Monitor Safety Controller's subscription health +4. **Failsafe**: Default to unsafe state if no recent messages + +### Failure Mode 3: Nav2 Lifecycle Service Failure + +**Symptom**: Lifecycle service calls fail or timeout + +**Risk**: Nav2 remains active when it should be deactivated + +**Mitigation**: +1. **Retry Logic**: Implement retries with exponential backoff +2. **cmd_vel Filter**: Fallback to velocity filtering if lifecycle fails +3. **Escalation**: Trigger system-level emergency stop if repeated failures +4. **Monitoring**: Log all service call failures for analysis + +### Failure Mode 4: Race Condition Between State Changes + +**Symptom**: Rapid state changes cause inconsistent safety decisions + +**Risk**: Brief periods where robot state and safety state mismatch + +**Mitigation**: +1. **State Machine**: Implement proper state machine in Safety Controller +2. **Debouncing**: Add short debounce period for state changes (e.g., 50ms) +3. **Locking**: Use thread-safe state access +4. **Prioritization**: Emergency stop always takes precedence + +### Failure Mode 5: RobotStateMachine Publishing Incorrect State + +**Symptom**: robot_state doesn't reflect actual robot condition + +**Risk**: Safety system makes decisions on false information + +**Mitigation**: +1. **Sentor Monitoring**: Add RobotStateMachine node to node_monitors +2. **Redundancy**: Cross-check state with other sensors (e.g., motor controller status) +3. **Validation**: Implement state validation checks (e.g., can't be "active" if motors disabled) +4. **Override**: Provide manual override capability + +### Failure Mode 6: Network Congestion or Delays + +**Symptom**: Safety messages delayed beyond acceptable latency + +**Risk**: Delayed reaction to dangerous conditions + +**Mitigation**: +1. **QoS Configuration**: Use appropriate QoS profiles (deadline, liveliness) +2. **Priority**: Use DDS priority settings for safety-critical topics +3. **Dedicated Network**: Consider dedicated network for safety communications +4. **Monitoring**: Monitor network latency and topic timing + +### Failure Mode 7: Partial Nav2 Deactivation + +**Symptom**: Some Nav2 nodes deactivate but others remain active + +**Risk**: Inconsistent Nav2 state, potential for unexpected behavior + +**Mitigation**: +1. **Atomic Operations**: Group lifecycle transitions where possible +2. **State Verification**: Verify all nodes reach expected state +3. **Rollback**: Roll back partial transitions +4. **cmd_vel Filter**: Rely on velocity filtering as backup + +--- + +## Appendix A: Message and Service Definitions + +### Custom Messages (if needed) + +#### RobotState.msg +``` +# Custom message for robot state (alternative to std_msgs/String) +string state # e.g., "idle", "active", "paused", "emergency_stop" +time timestamp # When state was entered +string previous_state # Previous state for debugging +uint32 state_count # Number of state transitions +``` + +#### SafetyStatus.msg +``` +# Aggregated safety status from Safety Controller +bool safe_to_navigate +string robot_state +bool autonomous_mode +bool safety_heartbeat +bool warning_heartbeat +time last_update +string blocking_condition # Which condition is false, if any +``` + +### Service Definitions + +Most services can use standard ROS2 interfaces: +- `std_srvs/SetBool` - For simple enable/disable +- `lifecycle_msgs/ChangeState` - For Nav2 lifecycle +- `action_msgs/CancelGoal` - For cancelling navigation + +--- + +## Appendix B: Configuration Templates + +### Safety Controller Launch File + +```python +# sentor_safety_controller_launch.py +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration + +def generate_launch_description(): + return LaunchDescription([ + DeclareLaunchArgument( + 'robot_state_topic', + default_value='/robot_state', + description='Topic for robot state' + ), + DeclareLaunchArgument( + 'autonomous_mode_topic', + default_value='/autonomous_mode', + description='Topic for autonomous mode flag' + ), + + Node( + package='sentor_safety_controller', + executable='safety_controller_node', + name='safety_controller', + output='screen', + parameters=[{ + 'robot_state_topic': LaunchConfiguration('robot_state_topic'), + 'autonomous_mode_topic': LaunchConfiguration('autonomous_mode_topic'), + 'safety_heartbeat_topic': '/safety/heartbeat', + 'warning_heartbeat_topic': '/warning/heartbeat', + 'enable_cmd_vel_filter': True, + 'reaction_time_threshold': 0.1, + }] + ), + ]) +``` + +### Complete System Launch + +```python +# sentor_nav2_system_launch.py +from launch import LaunchDescription +from launch.actions import IncludeLaunchDescription +from launch.launch_description_sources import PythonLaunchDescriptionSource +from launch_ros.actions import Node + +def generate_launch_description(): + return LaunchDescription([ + # Launch RobotStateMachine + IncludeLaunchDescription( + PythonLaunchDescriptionSource('path/to/robot_state_machine_launch.py') + ), + + # Launch Sentor + IncludeLaunchDescription( + PythonLaunchDescriptionSource('path/to/sentor_launch.py'), + launch_arguments={ + 'config_file': 'path/to/nav2_sentor_config.yaml', + }.items() + ), + + # Launch Safety Controller + IncludeLaunchDescription( + PythonLaunchDescriptionSource('path/to/safety_controller_launch.py') + ), + + # Launch Nav2 + IncludeLaunchDescription( + PythonLaunchDescriptionSource('path/to/nav2_bringup_launch.py') + ), + + # Optional: Launch cmd_vel filter + Node( + package='sentor_velocity_filter', + executable='velocity_filter_node', + name='velocity_filter', + remappings=[ + ('cmd_vel_in', '/cmd_vel'), + ('cmd_vel_out', '/cmd_vel_filtered'), + ] + ), + ]) +``` + +### Sentor Configuration for Nav2 + +```yaml +# nav2_sentor_config.yaml +monitors: + # Critical Navigation Sensors + - name: "/scan" + message_type: "sensor_msgs/msg/LaserScan" + rate: 10.0 + N: 5 + signal_when: + condition: "published" + timeout: 1.0 + autonomy_critical: true + tags: ["nav2", "sensor", "lidar"] + + - name: "/odom" + message_type: "nav_msgs/msg/Odometry" + rate: 20.0 + N: 10 + signal_when: + condition: "published" + timeout: 0.5 + autonomy_critical: true + tags: ["nav2", "odometry"] + + - name: "/amcl_pose" + message_type: "geometry_msgs/msg/PoseWithCovarianceStamped" + rate: 10.0 + signal_when: + condition: "published" + timeout: 1.0 + autonomy_critical: true + tags: ["nav2", "localization"] + + # Safety-Critical Sensors + - name: "/emergency_stop" + message_type: "std_msgs/msg/Bool" + rate: 5.0 + signal_lambdas: + - expression: "lambda x: x.data == False" # False means NOT stopped + timeout: 0.5 + safety_critical: true + tags: ["safety", "estop"] + +node_monitors: + # Nav2 Nodes + - name: "/controller_server" + timeout: 2.0 + autonomy_critical: true + poll_rate: 2.0 + tags: ["nav2", "controller"] + + - name: "/planner_server" + timeout: 2.0 + autonomy_critical: true + poll_rate: 2.0 + tags: ["nav2", "planner"] + + - name: "/bt_navigator" + timeout: 2.0 + autonomy_critical: true + poll_rate: 2.0 + tags: ["nav2", "bt"] + + - name: "/amcl" + timeout: 2.0 + autonomy_critical: true + poll_rate: 2.0 + tags: ["nav2", "localization"] + + # Safety-Critical System Nodes + - name: "/robot_state_machine" + timeout: 2.0 + safety_critical: true + poll_rate: 2.0 + tags: ["safety", "state_machine"] + + - name: "/safety_controller" + timeout: 2.0 + safety_critical: true + poll_rate: 2.0 + tags: ["safety", "controller"] +``` + +--- + +## Appendix C: References and Related Documentation + +### External Resources + +1. **Nav2 Documentation**: https://docs.nav2.org/ + - Lifecycle management: https://docs.nav2.org/configuration/packages/configuring-lifecycle.html + - Behavior Trees: https://docs.nav2.org/behavior_trees/index.html + +2. **RobotStateMachine**: https://github.com/LCAS/RobotStateMachine + - State machine implementation and interfaces + +3. **ROS2 Lifecycle**: https://design.ros2.org/articles/node_lifecycle.html + - Understanding managed nodes + +4. **ROS2 QoS**: https://docs.ros.org/en/rolling/Concepts/About-Quality-of-Service-Settings.html + - Reliability, durability, and deadline policies for safety-critical topics + +### Related Standards + +1. **ISO 13849** - Safety of machinery +2. **IEC 61508** - Functional safety of electrical/electronic systems +3. **ISO 10218** - Robots and robotic devices (if applicable) + +### Internal Documentation + +1. Sentor README: `/README.md` +2. Sentor Wiki: https://github.com/LCAS/sentor/wiki/sentor + +--- + +## Appendix D: Glossary + +| Term | Definition | +|------|------------| +| **Active State** | Robot operational state where autonomous navigation is permitted | +| **Autonomous Mode** | Flag indicating whether autonomous control is enabled | +| **Behavior Tree (BT)** | Tree structure used by Nav2 for navigation logic | +| **Heartbeat** | Periodic signal indicating system health | +| **Lifecycle Node** | ROS2 managed node with defined state transitions | +| **Safety-Critical** | Systems or conditions whose failure could cause harm | +| **Autonomy-Critical** | Systems or conditions required for autonomous operation | +| **Safe-to-Navigate** | Combined condition permitting autonomous navigation | +| **Emergency Stop** | Immediate halt of all robot motion | +| **QoS** | Quality of Service policies for ROS2 communication | + +--- + +## Sentor Guard Package + +In addition to the centralized Safety Controller approach, a complementary **`sentor_guard`** package provides reusable libraries and nodes for implementing safety at multiple levels: + +### Package Components + +1. **Software Context Guards** + - Python and C++ guard libraries + - Used as context managers or explicit waits in application code + - Blocks execution until safety conditions are met + - Provides non-blocking checks for periodic operations + +2. **Topic Guard Nodes** + - Transparent topic forwarding with safety gating + - Only passes messages when conditions are satisfied + - No code changes required in existing systems + - Useful for filtering cmd_vel and other command topics + +3. **Lifecycle Guard Nodes** + - Manages lifecycle state of other nodes based on safety conditions + - Automatically activates/deactivates managed nodes + - Configurable through ROS parameters + +### Integration Approaches + +The sentor_guard package enables multiple integration patterns: + +- **Centralized**: Safety Controller uses guard libraries for condition checking +- **Distributed**: Individual nodes use guards locally for defense in depth +- **Topic-Level**: Topic guard nodes filter command streams transparently +- **Hybrid**: Combine all approaches for maximum safety + +### Key Features + +- ROS parameter configuration with YAML examples +- Context manager pattern (Python `with` statement, C++ RAII) +- Timeout-based waiting with exceptions +- Non-blocking status checks +- Detailed blocking reason reporting +- Comprehensive examples and tests + +For complete design documentation, see [docs/SENTOR_GUARD_DESIGN.md](docs/SENTOR_GUARD_DESIGN.md). + +### Usage Example (Python) + +```python +from sentor_guard.guard import SentorGuard + +class MyRobotNode(Node): + def __init__(self): + super().__init__('my_robot') + self.guard = SentorGuard(self, required_state='active') + + def do_autonomous_action(self): + # Only executes when safe + with self.guard: + self.execute_navigation() +``` + +### Usage Example (C++) + +```cpp +#include "sentor_guard/guard.hpp" + +class MyRobotNode : public rclcpp::Node { +public: + MyRobotNode() : Node("my_robot"), guard_(shared_from_this()) {} + + void doAutonomousAction() { + // RAII guard - automatically waits + sentor_guard::SentorGuard::Guard guard(guard_); + executeNavigation(); + } + +private: + sentor_guard::SentorGuard guard_; +}; +``` + +The sentor_guard package provides the building blocks for implementing the safety strategies outlined in this architecture document, making it easy to add safety checks throughout the system. + +--- + +## Document Revision History + +| Version | Date | Author | Changes | +|---------|------|--------|---------| +| 1.0 | 2025-11-10 | GitHub Copilot | Initial concept architecture | +| 1.1 | 2025-11-16 | GitHub Copilot | Added sentor_guard package design | + +--- + +## Conclusion + +This concept architecture provides a comprehensive framework for integrating Sentor, RobotStateMachine, and Nav2 to ensure safe and compliant autonomous navigation. The key principles are: + +1. **Defense in Depth**: Multiple layers of safety (lifecycle, BT, velocity filter, software guards) +2. **Clear Responsibility**: Well-defined roles for each component +3. **Fast Response**: Sub-500ms reaction to safety violations +4. **Clean Integration**: Uses standard ROS2 patterns (lifecycle, actions, topics) +5. **Extensibility**: Framework can accommodate additional safety requirements +6. **Reusable Components**: sentor_guard package provides libraries for any ROS2 system + +The recommended implementation follows a phased approach, starting with the Safety Controller as the central coordination point, then adding additional layers for robustness. The new **sentor_guard** package provides reusable libraries and nodes that can be used both within the Safety Controller and distributed throughout the system for defense in depth. + +The system is designed to fail safe, with multiple independent mechanisms ensuring the robot stops when conditions are unsafe. The addition of software context guards allows developers to easily add safety checks at any point in their code, while topic guards provide transparent safety without code changes. + +Next steps should focus on implementing the sentor_guard package and creating a minimal viable implementation of the Safety Controller, then validating the approach in simulation before proceeding to hardware deployment. diff --git a/INTEGRATION_SUMMARY.md b/INTEGRATION_SUMMARY.md new file mode 100644 index 0000000..9fbd33d --- /dev/null +++ b/INTEGRATION_SUMMARY.md @@ -0,0 +1,340 @@ +# Integration Summary: Sentor + RobotStateMachine + Nav2 + +## Quick Reference Guide + +This document provides a high-level overview of the proposed integration architecture. For complete details, see [ARCHITECTURE_INTEGRATION.md](ARCHITECTURE_INTEGRATION.md). + +--- + +## Core Safety Requirement + +Autonomous navigation is permitted **ONLY** when: + +```python +robot_state == "active" AND autonomous_mode == True +``` + +Additionally recommended: +```python +safety_heartbeat == True AND warning_heartbeat == True +``` + +Any violation must immediately stop the robot and cancel navigation. + +--- + +## System Components + +| Component | Role | Key Output | +|-----------|------|------------| +| **RobotStateMachine** | Manages robot operational state and mode | `/robot_state`, `/autonomous_mode` | +| **Sentor** | Monitors system health | `/safety/heartbeat`, `/warning/heartbeat` | +| **Safety Controller** (NEW) | Coordinates safety conditions with Nav2 | Lifecycle management, goal cancellation | +| **sentor_guard** (NEW) | Reusable safety libraries and nodes | Python/C++ guards, topic filters, lifecycle mgmt | +| **Nav2** | Autonomous navigation | Navigation goals, motion commands | + +--- + +## Architecture Overview + +``` +RobotStateMachine ──┐ + ├──> Safety Controller ──> Nav2 ──> Robot Base +Sentor ─────────────┘ +``` + +**Safety Controller** is the new component that: +1. Subscribes to all safety condition topics +2. Controls Nav2 activation/deactivation based on conditions +3. Cancels navigation goals when conditions become invalid +4. Optionally filters velocity commands as last-resort safety + +--- + +## Integration Approach: Multi-Layer Safety + +### Layer 1: Lifecycle Management (Primary) +- Safety Controller activates/deactivates Nav2 based on safety conditions +- Uses **sentor_guard** libraries for condition checking +- Clean, well-defined ROS2 pattern +- ~100-500ms response time + +### Layer 2: Behavior Tree Integration (✅ Implemented) +- `CheckAutonomyAllowed` BT condition node checks safety in Nav2 +- Uses **sentor_guard** C++ library for condition evaluation +- Faster response (~50-100ms) with continuous monitoring +- Enables graceful pause/resume of navigation +- Integrates via standard BehaviorTree.CPP plugin mechanism +- Example BTs and launch files provided in `sentor_guard/examples/nav2_examples/` + +### Layer 3: cmd_vel Filter (Emergency Backup) +- **sentor_guard** Topic Guard node filters cmd_vel +- Zeros velocity commands when unsafe +- <50ms response time +- Last line of defense + +### Additional: Application-Level Guards +- **sentor_guard** Python/C++ libraries in user code +- Context managers and RAII patterns +- Blocks execution until safe +- Defense in depth throughout the system + +--- + +## State Transition Examples + +### Normal Operation +``` +1. autonomous_mode ← true +2. robot_state ← "active" +3. All monitors healthy (heartbeats ← true) +4. Safety Controller activates Nav2 +5. Navigation goals accepted and executed +``` + +### Emergency Stop +``` +1. Emergency button pressed → robot_state ← "emergency_stop" +2. Safety Controller detects change (< 100ms) +3. Cancels active Nav2 goals (< 200ms) +4. Optionally zeros cmd_vel (< 50ms) +5. Robot stops (< 500ms total) +``` + +### Sensor Failure +``` +1. Critical sensor stops publishing +2. Sentor detects failure → warning_heartbeat ← false +3. Safety Controller cancels navigation +4. Robot stops +5. After recovery + timeout → System ready again +``` + +--- + +## Key Timing Requirements + +| Event | Target Latency | Critical | +|-------|----------------|----------| +| State change detection | < 100ms | YES | +| Goal cancellation | < 200ms | YES | +| Total stop time | < 500ms | YES | +| Recovery after fault | < 15s | NO | + +--- + +## Implementation Phases + +### Phase 1: sentor_guard Package (HIGH PRIORITY) +- Create **`sentor_guard`** package with reusable libraries +- Implement Python guard library (context manager pattern) +- Implement C++ guard library (RAII pattern) +- Add topic guard node and lifecycle guard node +- Include configuration examples and launch files + +### Phase 2: Sentor Configuration (HIGH PRIORITY) +- Create Nav2-specific monitoring configuration +- Define which topics/nodes are safety-critical vs autonomy-critical +- Set appropriate timeouts + +### Phase 3: Safety Controller (HIGH PRIORITY) +- Create `sentor_safety_controller` package +- Use **sentor_guard** libraries for condition evaluation +- Add Nav2 lifecycle management +- Add goal cancellation capability + +### Phase 4: Nav2 BT Plugin (✅ COMPLETED) +- ✅ Created `CheckAutonomyAllowed` BT condition node +- ✅ Integrated with **sentor_guard** C++ library +- ✅ Added optional BehaviorTree.CPP dependency +- ✅ Created example behavior tree XML files +- ✅ Comprehensive integration documentation +- See: `src/sentor_guard/examples/nav2_examples/` + +### Phase 5: Testing & Validation (HIGH PRIORITY) +- Simulation testing +- Hardware validation +- Performance benchmarking + +--- + +## Sentor Configuration Example + +```yaml +monitors: + # Autonomy-critical: Required for navigation + - name: "/scan" + message_type: "sensor_msgs/msg/LaserScan" + rate: 10.0 + signal_when: + condition: "published" + timeout: 1.0 + autonomy_critical: true + + # Safety-critical: Required for safety + - name: "/emergency_stop" + message_type: "std_msgs/msg/Bool" + rate: 5.0 + signal_lambdas: + - expression: "lambda x: x.data == False" + timeout: 0.5 + safety_critical: true + +node_monitors: + - name: "/controller_server" + timeout: 2.0 + autonomy_critical: true + + - name: "/safety_controller" + timeout: 2.0 + safety_critical: true +``` + +--- + +## Failure Modes to Address + +1. **Safety Controller crashes** → Nav2 continues without oversight + - *Mitigation*: Monitor Safety Controller with Sentor, implement watchdog + +2. **Topic communication failure** → Stale safety data + - *Mitigation*: Implement message age checks, use reliable QoS + +3. **Nav2 lifecycle service fails** → Nav2 stays active + - *Mitigation*: Fallback to cmd_vel filter, implement retry logic + +4. **Race conditions** → Inconsistent state + - *Mitigation*: Proper state machine, debouncing, thread-safe access + +5. **Network congestion** → Delayed reactions + - *Mitigation*: QoS policies, DDS priorities, dedicated network + +--- + +## Key Design Principles + +1. **Defense in Depth**: Multiple independent safety layers +2. **Fail Safe**: System defaults to stopped/inactive on any failure +3. **Fast Response**: Sub-500ms reaction to safety violations +4. **Standard Patterns**: Uses ROS2 lifecycle, actions, and topics +5. **No Nav2 Modification**: Primary approach doesn't require Nav2 changes +6. **Comprehensive Logging**: All state changes logged for analysis + +--- + +## Quick Start: Using the Nav2 BT Integration + +The `CheckAutonomyAllowed` behavior tree condition node is now available for direct integration with Nav2. Here's how to use it: + +### 1. Build with BT support +```bash +# Install BehaviorTree.CPP if needed +sudo apt install ros-$ROS_DISTRO-behaviortree-cpp + +# Build sentor_guard +cd ~/ros2_ws +colcon build --packages-select sentor_guard +source install/setup.bash +``` + +### 2. Configure Nav2 to load the plugin + +Add to your `bt_navigator` parameters: +```yaml +bt_navigator: + ros__parameters: + plugin_lib_names: + # ... other Nav2 plugins ... + - sentor_guard_bt_nodes # Add this line + default_nav_to_pose_bt_xml: /path/to/your/behavior_tree.xml +``` + +### 3. Use in your behavior tree + +Simple pre-navigation check: +```xml + + + + + +``` + +Continuous monitoring with pause/resume: +```xml + + + + + + + +``` + +### 4. See complete examples + +Full working examples with launch files and documentation: +- `src/sentor_guard/examples/nav2_examples/navigate_with_guard.xml` - Complete BT with recovery +- `src/sentor_guard/examples/nav2_examples/simple_nav_with_guard.xml` - Minimal example +- `src/sentor_guard/examples/nav2_examples/README.md` - Detailed integration guide + +--- + +## Next Steps + +1. **Review and approve** this architecture concept +2. **Create Safety Controller package** with basic functionality +3. **Test in simulation** with mock RobotStateMachine +4. **Develop Sentor configuration** for your specific robot/Nav2 setup +5. **Validate timing** on target hardware +6. **Deploy incrementally** with thorough testing at each phase + +--- + +## Questions to Address + +Before implementation, clarify: + +1. **State Values**: What are the exact state strings used by RobotStateMachine? + - e.g., "active", "paused", "emergency_stop", "idle"? + +2. **Topic Names**: Confirm final topic names for: + - `/robot_state` + - `/autonomous_mode` + - Nav2 namespaces + +3. **QoS Requirements**: What reliability/durability needed for safety topics? + +4. **Hardware Platform**: What is the target compute platform? + - Affects timing validation + +5. **Nav2 Configuration**: Are there specific Nav2 customizations already in place? + +6. **Recovery Policy**: Should navigation automatically resume after recovery or wait for new goals? + - **Recommendation**: Wait for explicit new goals (safer) + +--- + +## References + +- **Full Architecture Document**: [ARCHITECTURE_INTEGRATION.md](ARCHITECTURE_INTEGRATION.md) +- **sentor_guard Package Design**: [docs/SENTOR_GUARD_DESIGN.md](docs/SENTOR_GUARD_DESIGN.md) +- **Integration Diagrams**: [docs/INTEGRATION_DIAGRAMS.md](docs/INTEGRATION_DIAGRAMS.md) +- **Sentor Documentation**: [README.md](README.md) +- **RobotStateMachine**: https://github.com/LCAS/RobotStateMachine +- **Nav2 Documentation**: https://docs.nav2.org/ +- **ROS2 Lifecycle**: https://design.ros2.org/articles/node_lifecycle.html + +--- + +## Contact and Feedback + +For questions or suggestions about this architecture: +- Open an issue in the sentor repository +- Reference issue LCAS/sentor#[issue_number] + +--- + +*Document Version: 1.0* +*Date: 2025-11-10* +*Status: Concept Proposal* diff --git a/README.md b/README.md index 0a71072..8dd659f 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,35 @@ Continuously monitor topic messages. Send warnings and execute other processes when certain conditions on the messages are satisfied. See the [wiki](https://github.com/LCAS/sentor/wiki/sentor). +## Integration Documentation + +For integrating Sentor with RobotStateMachine and Nav2 for safe autonomous navigation: + +- **[Integration Summary](INTEGRATION_SUMMARY.md)** - Quick reference guide (7KB, 5 min read) + - Core safety requirements and system overview + - Multi-layer safety approach with sentor_guard + - Implementation checklist + +- **[Full Architecture Document](ARCHITECTURE_INTEGRATION.md)** - Complete concept architecture (45KB, 30 min read) + - Detailed system architecture and component responsibilities + - Four integration strategies with pros/cons + - State transition handling and emergency stop behavior + - Phased implementation plan with code examples + - Testing strategy and failure mode analysis + - sentor_guard package integration + +- **[sentor_guard Package Design](docs/SENTOR_GUARD_DESIGN.md)** - Reusable safety libraries (52KB, 40 min read) + - Python and C++ guard libraries (context managers, RAII) + - Topic guard node for transparent filtering + - Lifecycle guard node for automatic state management + - Complete code examples and usage patterns + - Configuration and launch file templates + +- **[Integration Diagrams](docs/INTEGRATION_DIAGRAMS.md)** - Visual reference (22KB) + - ASCII diagrams for all major flows + - Timing diagrams and state machines + - Topic and service interactions + # Sentor Monitoring Configuration This document explains how to structure your YAML config file for **Sentor**’s topic- and node-monitoring, and how the two “heartbeats” (`safety/heartbeat` and `warning/heartbeat`) are driven by your *safety-critical* and *autonomy-critical* flags. diff --git a/colcon_defaults.yaml b/colcon_defaults.yaml new file mode 100644 index 0000000..99aece1 --- /dev/null +++ b/colcon_defaults.yaml @@ -0,0 +1,4 @@ +test: + executor: sequential +build: + symlink-install: true diff --git a/docs/INTEGRATION_DIAGRAMS.md b/docs/INTEGRATION_DIAGRAMS.md new file mode 100644 index 0000000..0ab25c7 --- /dev/null +++ b/docs/INTEGRATION_DIAGRAMS.md @@ -0,0 +1,616 @@ +# Integration Architecture Diagrams + +This document contains ASCII diagrams for the Sentor-RobotStateMachine-Nav2 integration. These diagrams complement the main architecture document. + +--- + +## High-Level System Architecture + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ Autonomous Navigation System │ +└────────────────────────────────────────────────────────────────────────┘ + + ┌─────────────────────┐ + │ RobotStateMachine │ + │ │ + │ • State Management │ + │ • Mode Control │ + └──────────┬──────────┘ + │ + │ publishes + │ + ┌──────────▼──────────┐ + │ /robot_state │ + │ /autonomous_mode │ + └──────────┬──────────┘ + │ + │ subscribed by + │ + ┌──────────────────────▼───────────────────────┐ + │ │ + │ Safety Controller │ + │ │ + │ Evaluates: state AND mode AND heartbeats │ + │ │ + └──────────┬─────────────────────┬──────────────┘ + │ │ + activates │ │ subscribes + lifecycle │ │ + │ ┌──────────▼──────────┐ + │ │ Sentor │ + │ │ │ + │ │ • Topic Monitors │ + │ │ • Node Monitors │ + │ └──────────┬──────────┘ + │ │ + │ │ publishes + │ │ + │ ┌──────────▼──────────┐ + │ │ /safety/heartbeat │ + │ │ /warning/heartbeat │ + │ └─────────────────────┘ + │ + ┌──────────▼───────────┐ + │ │ + │ Nav2 Stack │ + │ │ + │ • BT Navigator │ + │ • Controller │ + │ • Planner │ + │ • Recoveries │ + │ │ + └──────────┬───────────┘ + │ + │ publishes + │ + ┌──────────▼───────────┐ + │ /cmd_vel │ + └──────────┬───────────┘ + │ + │ (optional filter) + │ + ┌──────────▼───────────┐ + │ Robot Base │ + │ (motors/wheels) │ + └──────────────────────┘ +``` + +--- + +## Information Flow - Normal Operation + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Normal Navigation Flow │ +└──────────────────────────────────────────────────────────────────┘ + +Time: t0 (Initialization) +───────────────────────── + +RobotStateMachine: robot_state = "idle" + autonomous_mode = false + +Sentor: safety_heartbeat = false (not ready) + warning_heartbeat = false + +Safety Controller: Nav2 = INACTIVE + +Nav2: Lifecycle state = INACTIVE + + +Time: t1 (Activation Request) +────────────────────────────── + +Operator: "Enable autonomous mode" + │ + ▼ +RobotStateMachine: autonomous_mode = true + │ + ▼ + "Activate robot" + │ + ▼ + robot_state = "active" + + +Time: t2 (System Health Check) +─────────────────────────────── + +Sentor: All monitors healthy + │ + ▼ + (after safe_operation_timeout) + │ + ▼ + safety_heartbeat = true + warning_heartbeat = true + + +Time: t3 (Safety Controller Evaluation) +──────────────────────────────────────── + +Safety Controller: Checks conditions: + ✓ robot_state == "active" + ✓ autonomous_mode == true + ✓ safety_heartbeat == true + ✓ warning_heartbeat == true + │ + ▼ + SAFE_TO_NAVIGATE = TRUE + │ + ▼ + Activate Nav2 lifecycle + + +Time: t4 (Navigation Ready) +─────────────────────────── + +Nav2: Lifecycle state = ACTIVE + │ + ▼ + Ready to receive goals + │ + ▼ +Operator/Planner: Send NavigateToPose goal + │ + ▼ +Nav2: Execute navigation + │ + ▼ + Publish /cmd_vel + │ + ▼ +Robot: Moves autonomously +``` + +--- + +## Emergency Stop Sequence + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Emergency Stop Flow │ +└──────────────────────────────────────────────────────────────────┘ + +Initial State: Robot navigating autonomously + All safety conditions satisfied + +Time: t0 (Emergency Event) +────────────────────────── + +Emergency Stop: Button pressed! + │ + ▼ +RobotStateMachine: robot_state = "emergency_stop" + (< 10ms) + + +Time: t0+50ms (Detection) +────────────────────────── + +Safety Controller: Detects state change + │ + ▼ + SAFE_TO_NAVIGATE = FALSE + │ + ├─────────────────────┐ + │ │ + ▼ ▼ + Cancel Nav2 goals Publish zero cmd_vel + │ (if filter enabled) + │ │ + ▼ ▼ + Request Nav2 deactivate Overrides Nav2 output + + +Time: t0+200ms (Nav2 Response) +─────────────────────────────── + +Nav2: Goal cancellation received + │ + ▼ + Stop behavior tree + │ + ▼ + Stop publishing cmd_vel + │ + ▼ + Report goal: ABORTED + + +Time: t0+500ms (Motion Stop) +───────────────────────────── + +Robot Base: cmd_vel = 0 + │ + ▼ + Robot stopped + │ + ▼ +System: SAFE STATE ACHIEVED + + +Steady State: robot_state = "emergency_stop" + Nav2 = INACTIVE + Robot = stationary + Waiting for manual recovery +``` + +--- + +## Sensor Failure and Recovery + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Sensor Failure & Recovery Flow │ +└──────────────────────────────────────────────────────────────────┘ + +Phase 1: Normal Operation +───────────────────────── + +All Systems: Healthy and operating + +Lidar (/scan): Publishing at 10 Hz + │ + ▼ +Sentor: Monitor receives messages + warning_heartbeat = true + + +Phase 2: Sensor Failure +─────────────────────── + +Lidar: STOPS PUBLISHING + │ + ▼ +Sentor: Timeout exceeded (1.0s) + │ + ▼ + warning_heartbeat = false + │ + ▼ +Safety Controller: Detects heartbeat change + │ + ▼ + SAFE_TO_NAVIGATE = FALSE + │ + ▼ + Cancel navigation + Deactivate Nav2 + │ + ▼ +Robot: Stops moving + + +Phase 3: Sensor Recovery +──────────────────────── + +Lidar: Resumes publishing + │ + ▼ +Sentor: Messages received again + │ + ▼ + Wait safe_operation_timeout (10s) + │ + ▼ + All checks passed + │ + ▼ + warning_heartbeat = true + │ + ▼ +Safety Controller: All conditions satisfied + │ + ▼ + SAFE_TO_NAVIGATE = TRUE + │ + ▼ + Activate Nav2 + │ + ▼ +System: Ready for new navigation goals + (does NOT resume previous goal) +``` + +--- + +## Multi-Layer Safety Architecture + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ Defense in Depth │ +└────────────────────────────────────────────────────────────────────────┘ + + Goal Request + │ + ▼ + ╔══════════════════════════════╗ + ║ Layer 1: Safety Controller ║ + ║ ║ + ║ • Lifecycle Management ║ + ║ • Goal Cancellation ║ + ║ • Response: 100-500ms ║ + ╚══════════════════════════════╝ + │ + ┌────┴────┐ + │ PASS? │ + └────┬────┘ + │ YES + ▼ + ╔══════════════════════════════╗ + ║ Layer 2: Nav2 BT Plugin ║ + ║ ║ + ║ • Condition Checks in BT ║ + ║ • Graceful Failures ║ + ║ • Response: 50-100ms ║ + ╚══════════════════════════════╝ + │ + ┌────┴────┐ + │ PASS? │ + └────┬────┘ + │ YES + ▼ + ┌───────────────────┐ + │ Nav2 Executes │ + │ Navigation │ + └────────┬──────────┘ + │ + │ /cmd_vel + ▼ + ╔══════════════════════════════╗ + ║ Layer 3: cmd_vel Filter ║ + ║ ║ + ║ • Last-resort Gate ║ + ║ • Zeros unsafe commands ║ + ║ • Response: <50ms ║ + ╚══════════════════════════════╝ + │ + ┌────┴────┐ + │ SAFE? │ + └────┬────┘ + │ YES + ▼ + ┌───────────────────┐ + │ Robot Base │ + │ Executes Motion │ + └───────────────────┘ + + │ NO (at any layer) + ▼ + ┌─────────────┐ + │ STOP │ + │ cmd_vel=0 │ + └─────────────┘ +``` + +--- + +## Component State Machine + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ Safety Controller State Machine │ +└────────────────────────────────────────────────────────────────────────┘ + + ┌─────────────┐ + │ STARTING │ + └──────┬──────┘ + │ + ▼ + ┌─────────────┐ + ┌────┤ INACTIVE │ + │ └──────┬──────┘ + │ │ + │ │ Conditions satisfied + │ │ (state + mode + beats) + │ │ + │ ▼ + │ ┌─────────────┐ + │ │ ACTIVATING │ + │ └──────┬──────┘ + │ │ + │ │ Nav2 activation + │ │ successful + │ │ + │ ▼ + Any condition │ ┌─────────────┐ Conditions + becomes invalid │ │ ACTIVE │ still valid + │ │ │◄─────────┐ + │ │ Nav2 ready │ │ + │ │ for goals │ │ + │ └──────┬──────┘ │ + │ │ │ + │ │ Condition fails │ + │ │ │ + │ ▼ │ + │ ┌─────────────┐ │ + └───►│ DEACTIVATING│ │ + └──────┬──────┘ │ + │ │ + │ Nav2 │ + │ deactivated │ + │ │ + └─────────────────┘ + + +State Definitions: +────────────────── + +INACTIVE: Nav2 is not active, robot cannot navigate +ACTIVATING: Transitioning Nav2 to active state +ACTIVE: Nav2 is active and ready for navigation goals +DEACTIVATING: Shutting down Nav2 due to condition failure +``` + +--- + +## Topic and Service Interactions + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ ROS2 Communication Diagram │ +└────────────────────────────────────────────────────────────────────────┘ + +Topics (Publishers → Subscribers) +────────────────────────────────── + +RobotStateMachine + │ + ├─[/robot_state]────────────────────┐ + │ (std_msgs/String) │ + │ ▼ + └─[/autonomous_mode]─────────► Safety Controller + (std_msgs/Bool) + + +Sentor + │ + ├─[/safety/heartbeat]───────────────┐ + │ (std_msgs/Bool) │ + │ ▼ + └─[/warning/heartbeat]──────────► Safety Controller + (std_msgs/Bool) + + +Nav2 Controller + │ + └─[/cmd_vel]────────────────────┐ + (geometry_msgs/Twist) │ + ▼ + cmd_vel Filter (optional) + │ + ▼ + Robot Base + + +Services (Client → Server) +────────────────────────── + +Safety Controller ──[ChangeState]──► Nav2 Lifecycle Nodes + • /controller_server + • /planner_server + • /bt_navigator + +Safety Controller ──[SetOverride]──► Sentor + • /sentor/override_safety + • /sentor/override_warning + + +Actions (Client → Server) +───────────────────────── + +Safety Controller ──[CancelGoal]───► Nav2 Action Servers + • NavigateToPose + • FollowPath + +Mission Planner ────[NavigateToPose]─► Nav2 +``` + +--- + +## Timing Diagram - Emergency Stop + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ Emergency Stop Timing Diagram │ +└────────────────────────────────────────────────────────────────────────┘ + +Time (ms) Event Component +───────── ───────────────────────────────────── ───────────────── + + 0 Emergency button pressed Hardware + │ + 10 robot_state → "emergency_stop" RobotStateMachine + │ └─ Message published + │ + 50 State change detected Safety Controller + │ └─ SAFE_TO_NAVIGATE → FALSE + │ + 60 Cancel goal request sent Safety Controller + │ └─ CancelGoal action call + │ + 100 Zero cmd_vel published Safety Controller + │ └─ (if velocity filter enabled) + │ + 150 Lifecycle deactivate request sent Safety Controller + │ └─ ChangeState service call + │ + 200 Goal cancellation acknowledged Nav2 + │ └─ Goal status: ABORTED + │ + 250 Nav2 stops publishing cmd_vel Nav2 + │ + 300 Lifecycle transition complete Nav2 + │ └─ State: INACTIVE + │ + 500 Robot motion stops Robot Base + │ └─ Velocity: 0.0 m/s + │ + ▼ + System in safe state + +═══════════════════════════════════════════════════════════════════════════ + +Maximum Acceptable Latencies: +──────────────────────────── + +Detection: < 100 ms +Goal Cancellation: < 200 ms +Total Stop Time: < 500 ms +``` + +--- + +## Configuration Flow + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ System Configuration Flow │ +└────────────────────────────────────────────────────────────────────────┘ + + ┌─────────────────────┐ + │ Configuration │ + │ Files │ + └──┬──────────┬───────┘ + │ │ + ┌────────────┘ └────────────┐ + │ │ + ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ + │ sentor_config │ │ safety_config │ + │ .yaml │ │ .yaml │ + └────────┬────────┘ └────────┬────────┘ + │ │ + │ Defines: │ Defines: + │ • Topic monitors │ • Topic names + │ • Node monitors │ • Thresholds + │ • Timeouts │ • Nav2 nodes + │ • Critical flags │ • Filter enable + │ │ + ▼ ▼ + ┌─────────────────┐ ┌─────────────────┐ + │ Sentor │ │ Safety │ + │ Node │ │ Controller │ + └────────┬────────┘ └────────┬────────┘ + │ │ + │ Publishes: │ Subscribes: + │ • /safety/heartbeat │ • /robot_state + │ • /warning/heartbeat │ • /autonomous_mode + │ │ • /safety/heartbeat + │ │ • /warning/heartbeat + │ │ + └────────────┬───────────────────────┘ + │ + ▼ + ┌─────────────────┐ + │ Runtime │ + │ Parameters │ + │ via ROS2 │ + └─────────────────┘ +``` + +--- + +*These diagrams are ASCII representations for easy viewing and editing. For presentation purposes, consider creating graphical versions using tools like draw.io, PlantUML, or similar.* diff --git a/docs/NAV2_BT_INTEGRATION.md b/docs/NAV2_BT_INTEGRATION.md new file mode 100644 index 0000000..a33e354 --- /dev/null +++ b/docs/NAV2_BT_INTEGRATION.md @@ -0,0 +1,545 @@ +# Nav2 Behavior Tree Integration Guide + +This document provides a complete guide for integrating sentor_guard with Nav2 behavior trees to enable safe autonomous navigation with real-time safety monitoring. + +## Table of Contents + +1. [Overview](#overview) +2. [Architecture](#architecture) +3. [Prerequisites](#prerequisites) +4. [Installation](#installation) +5. [Basic Usage](#basic-usage) +6. [Advanced Usage](#advanced-usage) +7. [Testing](#testing) +8. [Troubleshooting](#troubleshooting) +9. [Best Practices](#best-practices) + +--- + +## Overview + +The `CheckAutonomyAllowed` behavior tree condition node enables direct integration of sentor safety monitoring within Nav2 navigation behavior trees. This provides: + +- **Real-time safety monitoring** during navigation +- **Graceful pause/resume** when safety conditions change +- **Configurable safety requirements** per use case +- **Standard integration** using BehaviorTree.CPP plugin mechanism + +### How It Works + +``` +┌─────────────────────────────────────────────────────────┐ +│ System Flow │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────┐ ┌──────────────┐ │ +│ │ Sentor │ │ RobotState │ │ +│ │ Monitoring │ │ Machine │ │ +│ └──────┬───────┘ └──────┬───────┘ │ +│ │ │ │ +│ │ /safety/heartbeat │ /robot_state │ +│ │ /warning/heartbeat │ /autonomous_mode │ +│ │ │ │ +│ └────────┬───────────┘ │ +│ ↓ │ +│ ┌────────────────────┐ │ +│ │ CheckAutonomy │ (BT Condition Node) │ +│ │ Allowed │ │ +│ └────────┬───────────┘ │ +│ ↓ │ +│ Returns SUCCESS/FAILURE │ +│ ↓ │ +│ ┌────────────────────┐ │ +│ │ Nav2 Behavior Tree │ │ +│ │ - ComputePath │ │ +│ │ - FollowPath │ │ +│ │ - Recovery │ │ +│ └────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────┘ +``` + +The condition node continuously evaluates safety requirements and returns: +- `SUCCESS` when all conditions are satisfied → navigation proceeds +- `FAILURE` when any condition is not met → navigation pauses + +--- + +## Architecture + +### Components + +1. **CheckAutonomyAllowed BT Node** (`sentor_guard_bt_nodes` library) + - C++ implementation using BehaviorTree.CPP + - Integrates with sentor_guard C++ library + - Configurable via BT XML + +2. **SentorGuard C++ Library** (`sentor_guard` library) + - Monitors ROS topics for safety conditions + - Provides blocking/non-blocking condition checking + - RAII pattern for safe execution + +3. **Safety Topics** (Published by external systems) + - `/robot_state` - Current operational state + - `/autonomous_mode` - Autonomous mode enabled/disabled + - `/safety/heartbeat` - Safety system health + - `/warning/heartbeat` - Warning system health + +### Integration Points + +The BT node integrates at multiple levels: + +``` +Level 1: Pre-Navigation Check + + ← Check once before starting + + + +Level 2: Continuous Monitoring + + + ← Check continuously + + + + +Level 3: Multi-Point Checking + + + + ← Multiple checks + + +``` + +--- + +## Prerequisites + +### Required + +- **ROS2** (Humble, Iron, or later) +- **Nav2** stack +- **BehaviorTree.CPP** 4.x + +### Optional but Recommended + +- **Sentor** monitoring package (for safety/warning heartbeats) +- **RobotStateMachine** (for state/mode management) + +### Installation of Dependencies + +```bash +# Install Nav2 and BehaviorTree.CPP +sudo apt install ros-$ROS_DISTRO-navigation2 ros-$ROS_DISTRO-behaviortree-cpp + +# Or build from source +cd ~/ros2_ws/src +git clone https://github.com/ros-planning/navigation2.git +git clone https://github.com/BehaviorTree/BehaviorTree.CPP.git +cd ~/ros2_ws +colcon build +``` + +--- + +## Installation + +### 1. Build sentor_guard Package + +```bash +cd ~/ros2_ws/src +# Clone sentor repository if not already present +git clone https://github.com/LCAS/sentor.git + +cd ~/ros2_ws +colcon build --packages-select sentor_guard +source install/setup.bash +``` + +### 2. Verify Installation + +```bash +# Check if BT plugin library is built +ls $(ros2 pkg prefix sentor_guard)/lib/libsentor_guard_bt_nodes.so + +# Check if plugin descriptor is installed +ls $(ros2 pkg prefix sentor_guard)/share/sentor_guard/sentor_guard_bt_nodes.xml +``` + +### 3. Configure Nav2 + +Add the plugin to your Nav2 bt_navigator parameters YAML: + +```yaml +bt_navigator: + ros__parameters: + plugin_lib_names: + - nav2_compute_path_to_pose_action_bt_node + - nav2_follow_path_action_bt_node + - nav2_back_up_action_bt_node + - nav2_spin_action_bt_node + - nav2_wait_action_bt_node + - nav2_clear_costmap_service_bt_node + - nav2_is_stuck_condition_bt_node + - nav2_goal_reached_condition_bt_node + - nav2_goal_updated_condition_bt_node + - nav2_rate_controller_bt_node + - nav2_distance_controller_bt_node + - nav2_speed_controller_bt_node + - nav2_truncate_path_action_bt_node + - nav2_recovery_node_bt_node + - nav2_pipeline_sequence_bt_node + - nav2_round_robin_node_bt_node + - nav2_transform_available_condition_bt_node + - nav2_time_expired_condition_bt_node + - nav2_distance_traveled_condition_bt_node + - sentor_guard_bt_nodes # Add this line +``` + +--- + +## Basic Usage + +### Simple Example + +Create a behavior tree XML file (e.g., `my_nav_bt.xml`): + +```xml + + + + + + + + + + + + +``` + +### Configuration Parameters + +All parameters are optional with sensible defaults: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `state_topic` | string | `/robot_state` | Topic for robot state | +| `mode_topic` | string | `/autonomous_mode` | Topic for autonomous mode | +| `safety_heartbeat_topic` | string | `/safety/heartbeat` | Safety heartbeat topic | +| `warning_heartbeat_topic` | string | `/warning/heartbeat` | Warning heartbeat topic | +| `required_state` | string | `active` | Required state value | +| `heartbeat_timeout` | int | `1000` | Heartbeat timeout (ms) | +| `require_autonomous_mode` | bool | `true` | Check autonomous mode | +| `require_safety_heartbeat` | bool | `true` | Check safety heartbeat | +| `require_warning_heartbeat` | bool | `true` | Check warning heartbeat | + +--- + +## Advanced Usage + +### Continuous Monitoring with Pause/Resume + +```xml + + + + + + + + + + + + + + + +``` + +When conditions fail: +1. BT returns FAILURE +2. Navigation pauses (FollowPath stops) +3. On next tick, conditions checked again +4. If conditions satisfied, navigation resumes + +### Multi-Level Safety + +Different safety requirements at different points: + +```xml + + + + + + + + + + + + + + + + + + + +``` + +### Integration with Recovery Behaviors + +```xml + + + + + + + + + + + + + + + + + + + + + + + + +``` + +--- + +## Testing + +### Using Test Scripts + +sentor_guard provides test scripts to verify integration: + +#### 1. Simple Guard Demo + +```bash +# Terminal 1 - Run demo +ros2 run sentor_guard simple_guard_demo.py + +# Terminal 2 - Publish safe conditions +ros2 topic pub /robot_state std_msgs/String "data: 'active'" -1 +ros2 topic pub /autonomous_mode std_msgs/Bool "data: true" -1 +ros2 topic pub /safety/heartbeat std_msgs/Bool "data: true" -r 2 +ros2 topic pub /warning/heartbeat std_msgs/Bool "data: true" -r 2 +``` + +#### 2. BT Integration Test + +```bash +ros2 run sentor_guard test_bt_integration.py +``` + +This publishes test conditions and simulates pause/resume scenarios. + +### Testing with Nav2 + +1. Start Nav2 with your behavior tree: +```bash +ros2 launch nav2_bringup navigation_launch.py \ + params_file:=your_params.yaml +``` + +2. Publish safety conditions: +```bash +# Safe conditions +ros2 topic pub /robot_state std_msgs/String "data: 'active'" -r 1 +ros2 topic pub /autonomous_mode std_msgs/Bool "data: true" -r 1 +ros2 topic pub /safety/heartbeat std_msgs/Bool "data: true" -r 2 +ros2 topic pub /warning/heartbeat std_msgs/Bool "data: true" -r 2 +``` + +3. Send navigation goal: +```bash +ros2 action send_goal /navigate_to_pose nav2_msgs/action/NavigateToPose \ + "{pose: {header: {frame_id: 'map'}, pose: {position: {x: 1.0, y: 0.0}}}}" +``` + +4. Test pause by changing state: +```bash +ros2 topic pub /robot_state std_msgs/String "data: 'paused'" -1 +``` + +--- + +## Troubleshooting + +### Plugin Not Loading + +**Symptom**: `Failed to load behavior tree plugin sentor_guard_bt_nodes` + +**Solutions**: +1. Verify package is built: + ```bash + ros2 pkg list | grep sentor_guard + ``` + +2. Check library exists: + ```bash + ls $(ros2 pkg prefix sentor_guard)/lib/libsentor_guard_bt_nodes.so + ``` + +3. Ensure workspace is sourced: + ```bash + source ~/ros2_ws/install/setup.bash + ``` + +### Navigation Never Starts + +**Symptom**: CheckAutonomyAllowed always returns FAILURE + +**Solutions**: +1. Verify topics are published: + ```bash + ros2 topic list | grep -E "robot_state|autonomous_mode|heartbeat" + ``` + +2. Check topic values: + ```bash + ros2 topic echo /robot_state + ros2 topic echo /autonomous_mode + ``` + +3. Enable debug logging: + ```bash + ros2 run nav2_bt_navigator bt_navigator --ros-args --log-level debug + ``` + +### Heartbeat Timeout + +**Symptom**: Navigation pauses frequently due to stale heartbeats + +**Solutions**: +1. Increase timeout in BT XML: + ```xml + + ``` + +2. Check heartbeat publication rate: + ```bash + ros2 topic hz /safety/heartbeat + ``` + +3. Ensure heartbeat publishers are running + +--- + +## Best Practices + +### 1. Choose Appropriate Check Frequency + +- **Pre-navigation only**: Fast response not critical, checking before start +- **1-2 Hz**: Balance between responsiveness and overhead +- **5-10 Hz**: Critical applications requiring fast response + +### 2. Configure Timeouts Appropriately + +- **Short timeout (500ms)**: Critical safety sensors +- **Medium timeout (1000ms)**: Standard heartbeats +- **Long timeout (2000ms)**: Slower systems or degraded mode + +### 3. Use Defense in Depth + +Combine multiple safety layers: +1. **BT condition check**: Real-time monitoring in navigation +2. **Lifecycle management**: Control Nav2 node activation +3. **cmd_vel filter**: Emergency backstop + +### 4. Design for Graceful Degradation + +```xml + + + + + + + + + + + + + + + + + + +``` + +### 5. Monitor and Log + +Enable comprehensive logging: +```xml + +``` + +Check logs: +```bash +ros2 topic echo /rosout | grep CheckAutonomyAllowed +``` + +--- + +## Example: Complete Navigation System + +See the provided examples: +- `src/sentor_guard/examples/nav2_examples/navigate_with_guard.xml` +- `src/sentor_guard/examples/nav2_examples/simple_nav_with_guard.xml` +- `src/sentor_guard/examples/nav2_examples/nav2_with_guard_launch.py` + +--- + +## Related Documentation + +- [sentor_guard Package README](../src/sentor_guard/README.md) +- [Nav2 Examples README](../src/sentor_guard/examples/nav2_examples/README.md) +- [Integration Architecture](../ARCHITECTURE_INTEGRATION.md) +- [Integration Summary](../INTEGRATION_SUMMARY.md) + +--- + +## Support + +For issues or questions: +- GitHub Issues: https://github.com/LCAS/sentor/issues +- Package maintainer: LCAS team + +--- + +## License + +MIT - Part of the LCAS sentor project diff --git a/docs/SENTOR_GUARD_DESIGN.md b/docs/SENTOR_GUARD_DESIGN.md new file mode 100644 index 0000000..a635390 --- /dev/null +++ b/docs/SENTOR_GUARD_DESIGN.md @@ -0,0 +1,1599 @@ +# Sentor Guard Package Design + +## Overview + +The `sentor_guard` package provides a comprehensive set of tools and libraries to implement safe autonomous behavior by integrating sentor's state monitoring with robot control systems. It implements three complementary approaches: + +1. **Lifecycle Management** - Control ROS2 lifecycle nodes based on safety conditions +2. **Software Context Guards** - Inline code guards for Python and C++ that block execution until safety conditions are met +3. **Topic Guards** - Transparent topic forwarding that only passes messages when safety conditions are satisfied + +This design complements the Safety Controller concept described in ARCHITECTURE_INTEGRATION.md by providing reusable libraries and nodes that can be integrated into any ROS2 system. + +--- + +## Table of Contents + +1. [Package Structure](#package-structure) +2. [Core Guard Abstraction](#core-guard-abstraction) +3. [Python Guard Library](#python-guard-library) +4. [C++ Guard Library](#c-guard-library) +5. [Topic Guard Node](#topic-guard-node) +6. [Lifecycle Guard Node](#lifecycle-guard-node) +7. [Configuration](#configuration) +8. [Launch Files](#launch-files) +9. [Usage Examples](#usage-examples) +10. [Testing Strategy](#testing-strategy) + +--- + +## Package Structure + +``` +sentor_guard/ +├── CMakeLists.txt +├── package.xml +├── README.md +│ +├── include/ +│ └── sentor_guard/ +│ ├── guard.hpp # C++ guard library +│ ├── guard_exception.hpp # Exception definitions +│ └── lifecycle_guard.hpp # Lifecycle management +│ +├── src/ +│ ├── guard.cpp # C++ guard implementation +│ ├── topic_guard_node.cpp # Topic forwarding node +│ └── lifecycle_guard_node.cpp # Lifecycle management node +│ +├── sentor_guard/ +│ ├── __init__.py +│ ├── guard.py # Python guard library +│ └── topic_guard_node.py # Python topic guard (optional) +│ +├── launch/ +│ ├── guard_example.launch.py # Example launch file +│ └── topic_guards.launch.py # Topic guard configuration +│ +├── config/ +│ ├── guard_params.yaml # Example guard parameters +│ └── topic_guards.yaml # Topic guard configuration +│ +├── test/ +│ ├── test_python_guard.py +│ ├── test_cpp_guard.cpp +│ └── test_topic_guard.cpp +│ +└── examples/ + ├── python_guard_example.py + └── cpp_guard_example.cpp +``` + +--- + +## Core Guard Abstraction + +The guard abstraction monitors sentor's state and heartbeat topics to determine if autonomous operations are permitted. All implementations share common behavior: + +### State Conditions + +A guard is satisfied when **all** of the following are true: + +1. **State Match**: Current state equals the required state (default: "active") +2. **Heartbeat Fresh**: Last heartbeat timestamp is within timeout period (default: 1.0s) +3. **Heartbeat Value**: Heartbeat indicates healthy operation (typically `true`) + +### Monitoring Topics + +| Topic | Type | Purpose | +|-------|------|---------| +| `/robot_state` | std_msgs/String | Current robot operational state | +| `/autonomous_mode` | std_msgs/Bool | Whether autonomous mode is enabled | +| `/safety/heartbeat` | std_msgs/Bool | Safety-critical system health | +| `/warning/heartbeat` | std_msgs/Bool | Autonomy-critical system health | + +### Operating Modes + +Guards support three operating modes: + +1. **Blocking Wait** - Wait indefinitely until conditions are met +2. **Timed Wait** - Wait with timeout, raise exception on failure +3. **Non-blocking Check** - Immediate check without waiting + +--- + +## Python Guard Library + +### Class: `SentorGuard` + +The Python guard can be used as a context manager or called directly. + +```python +# sentor_guard/guard.py +import rclpy +from rclpy.node import Node +from rclpy.time import Time, Duration +from std_msgs.msg import String, Bool +from threading import Event, Lock +import time + +class AutonomyGuardException(Exception): + """Raised when autonomy guard conditions are not met within timeout""" + pass + +class SentorGuard: + """ + Guard that checks sentor state and heartbeat before allowing execution. + + Can be used as a context manager or called directly. + """ + + def __init__(self, node: Node, + state_topic: str = '/robot_state', + mode_topic: str = '/autonomous_mode', + safety_heartbeat_topic: str = '/safety/heartbeat', + warning_heartbeat_topic: str = '/warning/heartbeat', + heartbeat_timeout: float = 1.0, + required_state: str = 'active', + require_autonomous_mode: bool = True, + require_safety_heartbeat: bool = True, + require_warning_heartbeat: bool = True): + """ + Args: + node: ROS2 node to use for subscriptions + state_topic: Topic publishing robot state + mode_topic: Topic publishing autonomous mode flag + safety_heartbeat_topic: Topic for safety heartbeat + warning_heartbeat_topic: Topic for warning heartbeat + heartbeat_timeout: Maximum age of heartbeat in seconds + required_state: State required for autonomy (e.g., 'active') + require_autonomous_mode: Whether autonomous mode must be true + require_safety_heartbeat: Whether safety heartbeat must be true + require_warning_heartbeat: Whether warning heartbeat must be true + """ + self.node = node + self.heartbeat_timeout = Duration(seconds=heartbeat_timeout) + self.required_state = required_state + self.require_autonomous_mode = require_autonomous_mode + self.require_safety_heartbeat = require_safety_heartbeat + self.require_warning_heartbeat = require_warning_heartbeat + + self._lock = Lock() + self._current_state = None + self._autonomous_mode = None + self._safety_heartbeat = None + self._warning_heartbeat = None + self._last_safety_heartbeat_time = None + self._last_warning_heartbeat_time = None + self._condition_met = Event() + + # Subscribe to state and mode topics + self._state_sub = node.create_subscription( + String, + state_topic, + self._state_callback, + 10 + ) + + self._mode_sub = node.create_subscription( + Bool, + mode_topic, + self._mode_callback, + 10 + ) + + # Subscribe to heartbeat topics + self._safety_heartbeat_sub = node.create_subscription( + Bool, + safety_heartbeat_topic, + self._safety_heartbeat_callback, + 10 + ) + + self._warning_heartbeat_sub = node.create_subscription( + Bool, + warning_heartbeat_topic, + self._warning_heartbeat_callback, + 10 + ) + + self.node.get_logger().info( + f"SentorGuard initialized: required_state='{required_state}', " + f"heartbeat_timeout={heartbeat_timeout}s" + ) + + def _state_callback(self, msg): + with self._lock: + self._current_state = msg.data + self._check_conditions() + + def _mode_callback(self, msg): + with self._lock: + self._autonomous_mode = msg.data + self._check_conditions() + + def _safety_heartbeat_callback(self, msg): + with self._lock: + self._safety_heartbeat = msg.data + self._last_safety_heartbeat_time = self.node.get_clock().now() + self._check_conditions() + + def _warning_heartbeat_callback(self, msg): + with self._lock: + self._warning_heartbeat = msg.data + self._last_warning_heartbeat_time = self.node.get_clock().now() + self._check_conditions() + + def _check_conditions(self): + """Check if all conditions are met""" + now = self.node.get_clock().now() + + # Check state + if self._current_state != self.required_state: + self._condition_met.clear() + return + + # Check autonomous mode + if self.require_autonomous_mode and not self._autonomous_mode: + self._condition_met.clear() + return + + # Check safety heartbeat + if self.require_safety_heartbeat: + if self._safety_heartbeat is None or not self._safety_heartbeat: + self._condition_met.clear() + return + + if self._last_safety_heartbeat_time is None: + self._condition_met.clear() + return + + age = now - self._last_safety_heartbeat_time + if age > self.heartbeat_timeout: + self._condition_met.clear() + return + + # Check warning heartbeat + if self.require_warning_heartbeat: + if self._warning_heartbeat is None or not self._warning_heartbeat: + self._condition_met.clear() + return + + if self._last_warning_heartbeat_time is None: + self._condition_met.clear() + return + + age = now - self._last_warning_heartbeat_time + if age > self.heartbeat_timeout: + self._condition_met.clear() + return + + # All conditions met + self._condition_met.set() + + def is_autonomy_allowed(self) -> bool: + """Check if autonomy is currently allowed (non-blocking)""" + with self._lock: + self._check_conditions() # Recheck heartbeat age + return self._condition_met.is_set() + + def get_blocking_reason(self) -> str: + """Get human-readable reason why autonomy is blocked""" + with self._lock: + now = self.node.get_clock().now() + + if self._current_state != self.required_state: + return f"State is '{self._current_state}', required '{self.required_state}'" + + if self.require_autonomous_mode and not self._autonomous_mode: + return "Autonomous mode is disabled" + + if self.require_safety_heartbeat: + if self._safety_heartbeat is None or not self._safety_heartbeat: + return "Safety heartbeat is unhealthy or not received" + + if self._last_safety_heartbeat_time: + age = now - self._last_safety_heartbeat_time + if age > self.heartbeat_timeout: + return f"Safety heartbeat stale ({age.nanoseconds / 1e9:.2f}s old)" + + if self.require_warning_heartbeat: + if self._warning_heartbeat is None or not self._warning_heartbeat: + return "Warning heartbeat is unhealthy or not received" + + if self._last_warning_heartbeat_time: + age = now - self._last_warning_heartbeat_time + if age > self.heartbeat_timeout: + return f"Warning heartbeat stale ({age.nanoseconds / 1e9:.2f}s old)" + + return "Unknown reason" + + def wait_for_autonomy(self, timeout: float = None) -> bool: + """ + Wait until autonomy is allowed. + + Args: + timeout: Maximum time to wait in seconds. None for indefinite. + + Returns: + True if autonomy is allowed, False if timeout occurred + """ + start_time = time.time() + rate = self.node.create_rate(10) # 10 Hz check rate + + while rclpy.ok(): + if self.is_autonomy_allowed(): + return True + + if timeout is not None: + elapsed = time.time() - start_time + if elapsed >= timeout: + reason = self.get_blocking_reason() + self.node.get_logger().warn( + f"Autonomy not granted within {timeout}s: {reason}" + ) + return False + + rclpy.spin_once(self.node, timeout_sec=0.1) + + return False + + def __enter__(self): + """Context manager entry - waits indefinitely by default""" + if not self.wait_for_autonomy(): + raise AutonomyGuardException("Autonomy not allowed and node is shutting down") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit""" + return False + + def guarded_wait(self, timeout: float = None): + """ + Wait for autonomy with timeout, raising exception on failure. + + Args: + timeout: Maximum time to wait. None for indefinite. + + Raises: + AutonomyGuardException: If timeout occurs + """ + if not self.wait_for_autonomy(timeout): + reason = self.get_blocking_reason() + if timeout: + raise AutonomyGuardException( + f"Autonomy not granted within {timeout}s timeout: {reason}" + ) + else: + raise AutonomyGuardException( + f"Autonomy not allowed: {reason}" + ) +``` + +### Usage Examples + +```python +# Example 1: Context manager (indefinite wait) +class MyRobotNode(Node): + def __init__(self): + super().__init__('my_robot') + self.guard = SentorGuard(self, heartbeat_timeout=1.0) + + def do_autonomous_action(self): + """Execute action only when safe""" + with self.guard: + self.get_logger().info("Executing autonomous action") + # Your autonomous code here + self.publish_command() + +# Example 2: Explicit timeout with exception +class TimedActionNode(Node): + def __init__(self): + super().__init__('timed_action') + self.guard = SentorGuard(self) + + def do_timed_action(self): + try: + self.guard.guarded_wait(timeout=5.0) + self.get_logger().info("Autonomy granted, proceeding") + # Your autonomous code here + except AutonomyGuardException as e: + self.get_logger().error(f"Cannot proceed: {e}") + +# Example 3: Non-blocking check +class NonBlockingNode(Node): + def __init__(self): + super().__init__('non_blocking') + self.guard = SentorGuard(self) + + def timer_callback(self): + if self.guard.is_autonomy_allowed(): + # Proceed with autonomous action + self.execute_navigation() + else: + # Skip or handle not allowed + reason = self.guard.get_blocking_reason() + self.get_logger().debug(f"Skipping action: {reason}") +``` + +--- + +## C++ Guard Library + +### Class: `SentorGuard` + +```cpp +// sentor_guard/include/sentor_guard/guard.hpp +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sentor_guard { + +class AutonomyGuardException : public std::runtime_error { +public: + explicit AutonomyGuardException(const std::string& message) + : std::runtime_error(message) {} +}; + +class SentorGuard { +public: + struct Options { + std::string state_topic = "/robot_state"; + std::string mode_topic = "/autonomous_mode"; + std::string safety_heartbeat_topic = "/safety/heartbeat"; + std::string warning_heartbeat_topic = "/warning/heartbeat"; + std::chrono::milliseconds heartbeat_timeout{1000}; + std::string required_state = "active"; + bool require_autonomous_mode = true; + bool require_safety_heartbeat = true; + bool require_warning_heartbeat = true; + }; + + explicit SentorGuard(rclcpp::Node::SharedPtr node, const Options& options = Options()); + + ~SentorGuard() = default; + + // Non-blocking check + bool isAutonomyAllowed(); + + // Get reason why autonomy is blocked + std::string getBlockingReason(); + + // Blocking wait with optional timeout + bool waitForAutonomy(std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()); + + // Wait with exception on timeout + void guardedWait(std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()); + + // RAII guard class + class Guard { + public: + explicit Guard(SentorGuard& guard, std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()) + : guard_(guard) { + guard_.guardedWait(timeout); + } + ~Guard() = default; + + Guard(const Guard&) = delete; + Guard& operator=(const Guard&) = delete; + + private: + SentorGuard& guard_; + }; + +private: + void stateCallback(const std_msgs::msg::String::SharedPtr msg); + void modeCallback(const std_msgs::msg::Bool::SharedPtr msg); + void safetyHeartbeatCallback(const std_msgs::msg::Bool::SharedPtr msg); + void warningHeartbeatCallback(const std_msgs::msg::Bool::SharedPtr msg); + + void checkConditions(); + + rclcpp::Node::SharedPtr node_; + Options options_; + + rclcpp::Subscription::SharedPtr state_sub_; + rclcpp::Subscription::SharedPtr mode_sub_; + rclcpp::Subscription::SharedPtr safety_heartbeat_sub_; + rclcpp::Subscription::SharedPtr warning_heartbeat_sub_; + + std::mutex mutex_; + std::condition_variable cv_; + + std::string current_state_; + bool autonomous_mode_{false}; + bool safety_heartbeat_{false}; + bool warning_heartbeat_{false}; + rclcpp::Time last_safety_heartbeat_time_; + rclcpp::Time last_warning_heartbeat_time_; + bool condition_met_{false}; +}; + +} // namespace sentor_guard +``` + +### Implementation + +```cpp +// sentor_guard/src/guard.cpp +#include "sentor_guard/guard.hpp" + +namespace sentor_guard { + +SentorGuard::SentorGuard(rclcpp::Node::SharedPtr node, const Options& options) + : node_(node), options_(options) { + + // Create subscriptions + state_sub_ = node_->create_subscription( + options_.state_topic, 10, + std::bind(&SentorGuard::stateCallback, this, std::placeholders::_1)); + + mode_sub_ = node_->create_subscription( + options_.mode_topic, 10, + std::bind(&SentorGuard::modeCallback, this, std::placeholders::_1)); + + safety_heartbeat_sub_ = node_->create_subscription( + options_.safety_heartbeat_topic, 10, + std::bind(&SentorGuard::safetyHeartbeatCallback, this, std::placeholders::_1)); + + warning_heartbeat_sub_ = node_->create_subscription( + options_.warning_heartbeat_topic, 10, + std::bind(&SentorGuard::warningHeartbeatCallback, this, std::placeholders::_1)); + + RCLCPP_INFO(node_->get_logger(), + "SentorGuard initialized: required_state='%s', heartbeat_timeout=%ldms", + options_.required_state.c_str(), options_.heartbeat_timeout.count()); +} + +void SentorGuard::stateCallback(const std_msgs::msg::String::SharedPtr msg) { + std::lock_guard lock(mutex_); + current_state_ = msg->data; + checkConditions(); +} + +void SentorGuard::modeCallback(const std_msgs::msg::Bool::SharedPtr msg) { + std::lock_guard lock(mutex_); + autonomous_mode_ = msg->data; + checkConditions(); +} + +void SentorGuard::safetyHeartbeatCallback(const std_msgs::msg::Bool::SharedPtr msg) { + std::lock_guard lock(mutex_); + safety_heartbeat_ = msg->data; + last_safety_heartbeat_time_ = node_->get_clock()->now(); + checkConditions(); +} + +void SentorGuard::warningHeartbeatCallback(const std_msgs::msg::Bool::SharedPtr msg) { + std::lock_guard lock(mutex_); + warning_heartbeat_ = msg->data; + last_warning_heartbeat_time_ = node_->get_clock()->now(); + checkConditions(); +} + +void SentorGuard::checkConditions() { + auto now = node_->get_clock()->now(); + + // Check state + if (current_state_ != options_.required_state) { + condition_met_ = false; + return; + } + + // Check autonomous mode + if (options_.require_autonomous_mode && !autonomous_mode_) { + condition_met_ = false; + return; + } + + // Check safety heartbeat + if (options_.require_safety_heartbeat) { + if (!safety_heartbeat_) { + condition_met_ = false; + return; + } + + auto age = now - last_safety_heartbeat_time_; + if (age > rclcpp::Duration(options_.heartbeat_timeout)) { + condition_met_ = false; + return; + } + } + + // Check warning heartbeat + if (options_.require_warning_heartbeat) { + if (!warning_heartbeat_) { + condition_met_ = false; + return; + } + + auto age = now - last_warning_heartbeat_time_; + if (age > rclcpp::Duration(options_.heartbeat_timeout)) { + condition_met_ = false; + return; + } + } + + // All conditions met + bool was_met = condition_met_; + condition_met_ = true; + + if (!was_met && condition_met_) { + cv_.notify_all(); + } +} + +bool SentorGuard::isAutonomyAllowed() { + std::lock_guard lock(mutex_); + checkConditions(); // Recheck heartbeat age + return condition_met_; +} + +std::string SentorGuard::getBlockingReason() { + std::lock_guard lock(mutex_); + auto now = node_->get_clock()->now(); + + if (current_state_ != options_.required_state) { + return "State is '" + current_state_ + "', required '" + options_.required_state + "'"; + } + + if (options_.require_autonomous_mode && !autonomous_mode_) { + return "Autonomous mode is disabled"; + } + + if (options_.require_safety_heartbeat) { + if (!safety_heartbeat_) { + return "Safety heartbeat is unhealthy or not received"; + } + + auto age = now - last_safety_heartbeat_time_; + if (age > rclcpp::Duration(options_.heartbeat_timeout)) { + return "Safety heartbeat stale (" + std::to_string(age.seconds()) + "s old)"; + } + } + + if (options_.require_warning_heartbeat) { + if (!warning_heartbeat_) { + return "Warning heartbeat is unhealthy or not received"; + } + + auto age = now - last_warning_heartbeat_time_; + if (age > rclcpp::Duration(options_.heartbeat_timeout)) { + return "Warning heartbeat stale (" + std::to_string(age.seconds()) + "s old)"; + } + } + + return "Unknown reason"; +} + +bool SentorGuard::waitForAutonomy(std::chrono::milliseconds timeout) { + auto start = std::chrono::steady_clock::now(); + + while (rclcpp::ok()) { + { + std::unique_lock lock(mutex_); + checkConditions(); + + if (condition_met_) { + return true; + } + + if (timeout.count() > 0) { + auto elapsed = std::chrono::steady_clock::now() - start; + auto remaining = timeout - std::chrono::duration_cast(elapsed); + + if (remaining.count() <= 0) { + auto reason = getBlockingReason(); + RCLCPP_WARN(node_->get_logger(), + "Autonomy not granted within %ldms: %s", + timeout.count(), reason.c_str()); + return false; + } + + cv_.wait_for(lock, std::chrono::milliseconds(100)); + } else { + cv_.wait_for(lock, std::chrono::milliseconds(100)); + } + } + + // Spin once to process callbacks + rclcpp::spin_some(node_); + } + + return false; +} + +void SentorGuard::guardedWait(std::chrono::milliseconds timeout) { + if (!waitForAutonomy(timeout)) { + auto reason = getBlockingReason(); + if (timeout.count() > 0) { + throw AutonomyGuardException( + "Autonomy not granted within " + std::to_string(timeout.count()) + + "ms timeout: " + reason); + } else { + throw AutonomyGuardException("Autonomy not allowed: " + reason); + } + } +} + +} // namespace sentor_guard +``` + +### C++ Usage Examples + +```cpp +// Example 1: RAII guard (recommended) +class MyRobotNode : public rclcpp::Node { +public: + MyRobotNode() : Node("my_robot"), guard_(shared_from_this()) {} + + void doAutonomousAction() { + // Automatically waits and throws on failure + sentor_guard::SentorGuard::Guard guard(guard_); + + RCLCPP_INFO(get_logger(), "Executing autonomous action"); + // Your autonomous code here + publishCommand(); + } + +private: + sentor_guard::SentorGuard guard_; +}; + +// Example 2: Explicit timeout +class TimedActionNode : public rclcpp::Node { +public: + TimedActionNode() : Node("timed_action"), guard_(shared_from_this()) {} + + void doTimedAction() { + try { + guard_.guardedWait(std::chrono::seconds(5)); + RCLCPP_INFO(get_logger(), "Autonomy granted, proceeding"); + // Your autonomous code here + } catch (const sentor_guard::AutonomyGuardException& e) { + RCLCPP_ERROR(get_logger(), "Cannot proceed: %s", e.what()); + } + } + +private: + sentor_guard::SentorGuard guard_; +}; + +// Example 3: Non-blocking check +class NonBlockingNode : public rclcpp::Node { +public: + NonBlockingNode() : Node("non_blocking"), guard_(shared_from_this()) { + timer_ = create_wall_timer( + std::chrono::seconds(1), + std::bind(&NonBlockingNode::timerCallback, this)); + } + + void timerCallback() { + if (guard_.isAutonomyAllowed()) { + // Proceed with autonomous action + executeNavigation(); + } else { + // Skip or handle not allowed + auto reason = guard_.getBlockingReason(); + RCLCPP_DEBUG(get_logger(), "Skipping action: %s", reason.c_str()); + } + } + +private: + sentor_guard::SentorGuard guard_; + rclcpp::TimerBase::SharedPtr timer_; +}; +``` + +--- + +## Topic Guard Node + +The Topic Guard Node provides transparent topic forwarding that only passes messages when safety conditions are met. This is useful for adding safety to existing systems without modifying their code. + +### Concept + +``` +Input Topic ──> Topic Guard Node ──> Output Topic + │ + └─> Only forwards when guard conditions met + └─> Otherwise drops messages +``` + +### Node Implementation + +```cpp +// sentor_guard/src/topic_guard_node.cpp +#include +#include "sentor_guard/guard.hpp" +#include + +class TopicGuardNode : public rclcpp::Node { +public: + TopicGuardNode() : Node("topic_guard_node") { + // Declare parameters + declare_parameter("input_topic", ""); + declare_parameter("output_topic", ""); + declare_parameter("message_type", ""); + declare_parameter("required_state", "active"); + declare_parameter("heartbeat_timeout", 1.0); + + // Get parameters + std::string input_topic = get_parameter("input_topic").as_string(); + std::string output_topic = get_parameter("output_topic").as_string(); + std::string message_type = get_parameter("message_type").as_string(); + + // Initialize guard + sentor_guard::SentorGuard::Options guard_options; + guard_options.required_state = get_parameter("required_state").as_string(); + guard_options.heartbeat_timeout = std::chrono::milliseconds( + static_cast(get_parameter("heartbeat_timeout").as_double() * 1000)); + + guard_ = std::make_unique(shared_from_this(), guard_options); + + // Create generic publisher and subscriber + // Implementation depends on topic_tools or similar for type-agnostic forwarding + + RCLCPP_INFO(get_logger(), + "Topic guard initialized: %s -> %s (type: %s)", + input_topic.c_str(), output_topic.c_str(), message_type.c_str()); + } + +private: + void messageCallback(const std::shared_ptr& msg) { + if (guard_->isAutonomyAllowed()) { + // Forward message + publisher_->publish(*msg); + messages_forwarded_++; + } else { + // Drop message + messages_dropped_++; + + if (messages_dropped_ % 100 == 0) { + RCLCPP_WARN(get_logger(), + "Dropped %ld messages: %s", + messages_dropped_, guard_->getBlockingReason().c_str()); + } + } + } + + std::unique_ptr guard_; + rclcpp::GenericPublisher::SharedPtr publisher_; + rclcpp::GenericSubscription::SharedPtr subscriber_; + size_t messages_forwarded_{0}; + size_t messages_dropped_{0}; +}; +``` + +--- + +## Lifecycle Guard Node + +Manages lifecycle state of other nodes based on guard conditions. + +```cpp +// sentor_guard/src/lifecycle_guard_node.cpp +#include +#include +#include +#include "sentor_guard/guard.hpp" + +class LifecycleGuardNode : public rclcpp::Node { +public: + LifecycleGuardNode() : Node("lifecycle_guard_node") { + // Declare parameters + declare_parameter("managed_nodes", std::vector{}); + declare_parameter("check_rate", 10.0); + + auto managed_nodes = get_parameter("managed_nodes").as_string_array(); + + // Initialize guard + guard_ = std::make_unique(shared_from_this()); + + // Create service clients for each managed node + for (const auto& node_name : managed_nodes) { + auto client = create_client( + node_name + "/change_state"); + lifecycle_clients_[node_name] = client; + } + + // Create timer to check conditions periodically + double rate = get_parameter("check_rate").as_double(); + timer_ = create_wall_timer( + std::chrono::milliseconds(static_cast(1000.0 / rate)), + std::bind(&LifecycleGuardNode::checkCallback, this)); + + RCLCPP_INFO(get_logger(), "Lifecycle guard managing %zu nodes", managed_nodes.size()); + } + +private: + void checkCallback() { + bool allowed = guard_->isAutonomyAllowed(); + + if (allowed && !currently_active_) { + activateNodes(); + } else if (!allowed && currently_active_) { + deactivateNodes(); + } + } + + void activateNodes() { + RCLCPP_INFO(get_logger(), "Activating managed nodes"); + + for (auto& [node_name, client] : lifecycle_clients_) { + auto request = std::make_shared(); + request->transition.id = lifecycle_msgs::msg::Transition::TRANSITION_ACTIVATE; + + if (client->wait_for_service(std::chrono::seconds(1))) { + client->async_send_request(request); + } else { + RCLCPP_WARN(get_logger(), "Service not available for %s", node_name.c_str()); + } + } + + currently_active_ = true; + } + + void deactivateNodes() { + RCLCPP_INFO(get_logger(), "Deactivating managed nodes: %s", + guard_->getBlockingReason().c_str()); + + for (auto& [node_name, client] : lifecycle_clients_) { + auto request = std::make_shared(); + request->transition.id = lifecycle_msgs::msg::Transition::TRANSITION_DEACTIVATE; + + if (client->wait_for_service(std::chrono::seconds(1))) { + client->async_send_request(request); + } + } + + currently_active_ = false; + } + + std::unique_ptr guard_; + std::map::SharedPtr> lifecycle_clients_; + rclcpp::TimerBase::SharedPtr timer_; + bool currently_active_{false}; +}; +``` + +--- + +## Configuration + +### Guard Parameters YAML + +```yaml +# config/guard_params.yaml + +/**: + ros__parameters: + # Topics to monitor + state_topic: "/robot_state" + mode_topic: "/autonomous_mode" + safety_heartbeat_topic: "/safety/heartbeat" + warning_heartbeat_topic: "/warning/heartbeat" + + # Guard conditions + required_state: "active" + heartbeat_timeout: 1.0 # seconds + + # Which conditions to enforce + require_autonomous_mode: true + require_safety_heartbeat: true + require_warning_heartbeat: true +``` + +### Topic Guards Configuration + +```yaml +# config/topic_guards.yaml + +topic_guards: + - name: "cmd_vel_guard" + input_topic: "/nav2/cmd_vel" + output_topic: "/cmd_vel" + message_type: "geometry_msgs/msg/Twist" + required_state: "active" + heartbeat_timeout: 1.0 + + - name: "goal_guard" + input_topic: "/goal_pose_unguarded" + output_topic: "/goal_pose" + message_type: "geometry_msgs/msg/PoseStamped" + required_state: "active" + heartbeat_timeout: 1.0 +``` + +### Lifecycle Management Configuration + +```yaml +# config/lifecycle_guards.yaml + +lifecycle_guard: + ros__parameters: + managed_nodes: + - "/controller_server" + - "/planner_server" + - "/bt_navigator" + + check_rate: 10.0 # Hz + required_state: "active" + heartbeat_timeout: 1.0 +``` + +--- + +## Launch Files + +### Example Launch File + +```python +# launch/guard_example.launch.py +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from ament_index_python.packages import get_package_share_directory +import os + +def generate_launch_description(): + pkg_share = get_package_share_directory('sentor_guard') + + guard_params_file = os.path.join(pkg_share, 'config', 'guard_params.yaml') + + return LaunchDescription([ + DeclareLaunchArgument( + 'guard_params', + default_value=guard_params_file, + description='Path to guard parameters file' + ), + + # Launch lifecycle guard node + Node( + package='sentor_guard', + executable='lifecycle_guard_node', + name='lifecycle_guard', + output='screen', + parameters=[LaunchConfiguration('guard_params')], + ), + + # Example user node with guard + Node( + package='sentor_guard', + executable='example_guarded_node', + name='example_node', + output='screen', + parameters=[LaunchConfiguration('guard_params')], + ), + ]) +``` + +### Topic Guards Launch File + +```python +# launch/topic_guards.launch.py +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +import yaml +from ament_index_python.packages import get_package_share_directory +import os + +def generate_launch_description(): + pkg_share = get_package_share_directory('sentor_guard') + topic_guards_file = os.path.join(pkg_share, 'config', 'topic_guards.yaml') + + # Load topic guards configuration + with open(topic_guards_file, 'r') as f: + config = yaml.safe_load(f) + + nodes = [] + + # Create a topic guard node for each configured guard + for guard_config in config['topic_guards']: + node = Node( + package='sentor_guard', + executable='topic_guard_node', + name=guard_config['name'], + output='screen', + parameters=[{ + 'input_topic': guard_config['input_topic'], + 'output_topic': guard_config['output_topic'], + 'message_type': guard_config['message_type'], + 'required_state': guard_config.get('required_state', 'active'), + 'heartbeat_timeout': guard_config.get('heartbeat_timeout', 1.0), + }] + ) + nodes.append(node) + + return LaunchDescription(nodes) +``` + +--- + +## Usage Examples + +### Python Application Example + +```python +# examples/python_guard_example.py +import rclpy +from rclpy.node import Node +from geometry_msgs.msg import Twist +from sentor_guard.guard import SentorGuard, AutonomyGuardException + +class GuardedNavigationNode(Node): + def __init__(self): + super().__init__('guarded_navigation') + + # Initialize guard + self.guard = SentorGuard( + self, + required_state='active', + heartbeat_timeout=1.0, + require_autonomous_mode=True, + require_safety_heartbeat=True, + require_warning_heartbeat=True + ) + + # Create publisher + self.cmd_vel_pub = self.create_publisher(Twist, '/cmd_vel', 10) + + # Create timer for periodic action + self.timer = self.create_timer(0.1, self.timer_callback) + + self.get_logger().info("Guarded navigation node started") + + def timer_callback(self): + """Periodic callback - only publishes if guard allows""" + if self.guard.is_autonomy_allowed(): + # Autonomy allowed, publish command + msg = Twist() + msg.linear.x = 0.5 + self.cmd_vel_pub.publish(msg) + else: + # Not allowed, log reason + reason = self.guard.get_blocking_reason() + self.get_logger().debug(f"Navigation blocked: {reason}") + + def execute_mission(self): + """Execute a mission with timeout""" + try: + # Wait up to 10 seconds for autonomy + self.guard.guarded_wait(timeout=10.0) + + # Proceed with mission + self.get_logger().info("Starting mission") + self.run_mission_steps() + + except AutonomyGuardException as e: + self.get_logger().error(f"Mission aborted: {e}") + + def critical_action(self): + """Critical action that must wait indefinitely""" + with self.guard: + self.get_logger().info("Executing critical action") + # This code only runs when guard conditions are met + self.perform_action() + +def main(): + rclpy.init() + node = GuardedNavigationNode() + rclpy.spin(node) + rclpy.shutdown() + +if __name__ == '__main__': + main() +``` + +### C++ Application Example + +```cpp +// examples/cpp_guard_example.cpp +#include +#include +#include "sentor_guard/guard.hpp" + +class GuardedNavigationNode : public rclcpp::Node { +public: + GuardedNavigationNode() : Node("guarded_navigation") { + // Initialize guard + sentor_guard::SentorGuard::Options options; + options.required_state = "active"; + options.heartbeat_timeout = std::chrono::seconds(1); + options.require_autonomous_mode = true; + options.require_safety_heartbeat = true; + options.require_warning_heartbeat = true; + + guard_ = std::make_unique(shared_from_this(), options); + + // Create publisher + cmd_vel_pub_ = create_publisher("/cmd_vel", 10); + + // Create timer for periodic action + timer_ = create_wall_timer( + std::chrono::milliseconds(100), + std::bind(&GuardedNavigationNode::timerCallback, this)); + + RCLCPP_INFO(get_logger(), "Guarded navigation node started"); + } + + void timerCallback() { + if (guard_->isAutonomyAllowed()) { + // Autonomy allowed, publish command + auto msg = geometry_msgs::msg::Twist(); + msg.linear.x = 0.5; + cmd_vel_pub_->publish(msg); + } else { + // Not allowed, log reason + auto reason = guard_->getBlockingReason(); + RCLCPP_DEBUG(get_logger(), "Navigation blocked: %s", reason.c_str()); + } + } + + void executeMission() { + try { + // Wait up to 10 seconds for autonomy + guard_->guardedWait(std::chrono::seconds(10)); + + // Proceed with mission + RCLCPP_INFO(get_logger(), "Starting mission"); + runMissionSteps(); + + } catch (const sentor_guard::AutonomyGuardException& e) { + RCLCPP_ERROR(get_logger(), "Mission aborted: %s", e.what()); + } + } + + void criticalAction() { + // RAII guard - automatically waits + sentor_guard::SentorGuard::Guard guard(*guard_); + + RCLCPP_INFO(get_logger(), "Executing critical action"); + // This code only runs when guard conditions are met + performAction(); + } + +private: + std::unique_ptr guard_; + rclcpp::Publisher::SharedPtr cmd_vel_pub_; + rclcpp::TimerBase::SharedPtr timer_; +}; + +int main(int argc, char** argv) { + rclcpp::init(argc, argv); + auto node = std::make_shared(); + rclcpp::spin(node); + rclcpp::shutdown(); + return 0; +} +``` + +--- + +## Testing Strategy + +### Unit Tests + +```python +# test/test_python_guard.py +import unittest +import rclpy +from rclpy.node import Node +from std_msgs.msg import String, Bool +from sentor_guard.guard import SentorGuard, AutonomyGuardException +import time + +class TestSentorGuard(unittest.TestCase): + @classmethod + def setUpClass(cls): + rclpy.init() + + @classmethod + def tearDownClass(cls): + rclcpp.shutdown() + + def setUp(self): + self.node = Node('test_guard_node') + self.guard = SentorGuard(self.node, heartbeat_timeout=0.5) + + # Create test publishers + self.state_pub = self.node.create_publisher(String, '/robot_state', 10) + self.mode_pub = self.node.create_publisher(Bool, '/autonomous_mode', 10) + self.safety_pub = self.node.create_publisher(Bool, '/safety/heartbeat', 10) + self.warning_pub = self.node.create_publisher(Bool, '/warning/heartbeat', 10) + + time.sleep(0.1) # Allow subscriptions to connect + + def test_all_conditions_met(self): + """Test that guard allows when all conditions are met""" + # Publish required state + msg = String() + msg.data = 'active' + self.state_pub.publish(msg) + + # Publish autonomous mode + mode_msg = Bool() + mode_msg.data = True + self.mode_pub.publish(mode_msg) + + # Publish heartbeats + hb_msg = Bool() + hb_msg.data = True + self.safety_pub.publish(hb_msg) + self.warning_pub.publish(hb_msg) + + # Spin to process messages + rclpy.spin_once(self.node, timeout_sec=0.1) + + # Check that autonomy is allowed + self.assertTrue(self.guard.is_autonomy_allowed()) + + def test_wrong_state_blocks(self): + """Test that wrong state blocks autonomy""" + msg = String() + msg.data = 'paused' + self.state_pub.publish(msg) + + rclpy.spin_once(self.node, timeout_sec=0.1) + + self.assertFalse(self.guard.is_autonomy_allowed()) + self.assertIn("State is", self.guard.get_blocking_reason()) + + def test_heartbeat_timeout(self): + """Test that stale heartbeat blocks autonomy""" + # Set up valid state + msg = String() + msg.data = 'active' + self.state_pub.publish(msg) + + mode_msg = Bool() + mode_msg.data = True + self.mode_pub.publish(mode_msg) + + hb_msg = Bool() + hb_msg.data = True + self.safety_pub.publish(hb_msg) + self.warning_pub.publish(hb_msg) + + rclpy.spin_once(self.node, timeout_sec=0.1) + self.assertTrue(self.guard.is_autonomy_allowed()) + + # Wait for heartbeat timeout + time.sleep(0.6) + rclpy.spin_once(self.node, timeout_sec=0.1) + + self.assertFalse(self.guard.is_autonomy_allowed()) + self.assertIn("stale", self.guard.get_blocking_reason()) + + def test_timeout_exception(self): + """Test that timeout raises exception""" + with self.assertRaises(AutonomyGuardException): + self.guard.guarded_wait(timeout=0.1) +``` + +### Integration Tests + +```cpp +// test/test_cpp_guard.cpp +#include +#include +#include "sentor_guard/guard.hpp" +#include +#include + +class TestSentorGuard : public ::testing::Test { +protected: + void SetUp() override { + rclcpp::init(0, nullptr); + node_ = std::make_shared("test_guard_node"); + + sentor_guard::SentorGuard::Options options; + options.heartbeat_timeout = std::chrono::milliseconds(500); + guard_ = std::make_unique(node_, options); + + // Create test publishers + state_pub_ = node_->create_publisher("/robot_state", 10); + mode_pub_ = node_->create_publisher("/autonomous_mode", 10); + safety_pub_ = node_->create_publisher("/safety/heartbeat", 10); + warning_pub_ = node_->create_publisher("/warning/heartbeat", 10); + + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + + void TearDown() override { + rclcpp::shutdown(); + } + + rclcpp::Node::SharedPtr node_; + std::unique_ptr guard_; + rclcpp::Publisher::SharedPtr state_pub_; + rclcpp::Publisher::SharedPtr mode_pub_; + rclcpp::Publisher::SharedPtr safety_pub_; + rclcpp::Publisher::SharedPtr warning_pub_; +}; + +TEST_F(TestSentorGuard, AllConditionsMet) { + // Publish required state + auto state_msg = std_msgs::msg::String(); + state_msg.data = "active"; + state_pub_->publish(state_msg); + + // Publish autonomous mode + auto mode_msg = std_msgs::msg::Bool(); + mode_msg.data = true; + mode_pub_->publish(mode_msg); + + // Publish heartbeats + auto hb_msg = std_msgs::msg::Bool(); + hb_msg.data = true; + safety_pub_->publish(hb_msg); + warning_pub_->publish(hb_msg); + + rclcpp::spin_some(node_); + + EXPECT_TRUE(guard_->isAutonomyAllowed()); +} + +TEST_F(TestSentorGuard, WrongStateBlocks) { + auto state_msg = std_msgs::msg::String(); + state_msg.data = "paused"; + state_pub_->publish(state_msg); + + rclcpp::spin_some(node_); + + EXPECT_FALSE(guard_->isAutonomyAllowed()); + EXPECT_THAT(guard_->getBlockingReason(), ::testing::HasSubstr("State is")); +} + +TEST_F(TestSentorGuard, HeartbeatTimeout) { + // Set up valid state + auto state_msg = std_msgs::msg::String(); + state_msg.data = "active"; + state_pub_->publish(state_msg); + + auto mode_msg = std_msgs::msg::Bool(); + mode_msg.data = true; + mode_pub_->publish(mode_msg); + + auto hb_msg = std_msgs::msg::Bool(); + hb_msg.data = true; + safety_pub_->publish(hb_msg); + warning_pub_->publish(hb_msg); + + rclcpp::spin_some(node_); + EXPECT_TRUE(guard_->isAutonomyAllowed()); + + // Wait for heartbeat timeout + std::this_thread::sleep_for(std::chrono::milliseconds(600)); + rclcpp::spin_some(node_); + + EXPECT_FALSE(guard_->isAutonomyAllowed()); + EXPECT_THAT(guard_->getBlockingReason(), ::testing::HasSubstr("stale")); +} +``` + +--- + +## Integration with Existing Architecture + +The `sentor_guard` package complements the Safety Controller architecture described in ARCHITECTURE_INTEGRATION.md: + +### Relationship Diagram + +``` +┌────────────────────────────────────────────────────────────┐ +│ Complete System │ +└────────────────────────────────────────────────────────────┘ + +RobotStateMachine ──┐ + /robot_state │ + /autonomous_mode │ + ├──> Sentor ──> /safety/heartbeat + │ /warning/heartbeat + │ │ + │ ↓ + │ ┌───────────────────┐ + │ │ sentor_guard │ + │ │ (This Package) │ + │ │ │ + │ │ • Python Guard │ + │ │ • C++ Guard │ + │ │ • Topic Guards │ + │ │ • Lifecycle Mgr │ + │ └─────────┬─────────┘ + │ │ + ↓ ↓ + ┌──────────────────┐ ┌──────────────────┐ + │ Safety Controller│ │ User Code with │ + │ (Central) │ │ Guard Libraries │ + └────────┬─────────┘ └────────┬─────────┘ + │ │ + ↓ ↓ + Nav2 ────────────> Robot Actions +``` + +### Usage Scenarios + +1. **Centralized Safety Controller** (Strategy 1 from ARCHITECTURE_INTEGRATION.md) + - Use Safety Controller node with sentor_guard libraries + - Safety Controller uses C++ Guard for condition checking + - Manages Nav2 lifecycle based on guard state + +2. **Distributed Application Guards** (Complementary approach) + - Individual nodes use Python/C++ Guard libraries + - Each node independently checks conditions before actions + - Defense in depth with local safety checks + +3. **Topic-Level Safety** (Strategy 4 from ARCHITECTURE_INTEGRATION.md) + - Use Topic Guard nodes to filter cmd_vel + - Transparent integration with existing systems + - No code changes required + +4. **Hybrid Approach** (Recommended) + - Safety Controller for lifecycle management + - Topic Guards for cmd_vel filtering + - Application code uses Guard libraries for critical sections + +--- + +## Summary + +The `sentor_guard` package provides a complete toolkit for implementing safe autonomous behavior: + +- **Reusable Libraries**: Python and C++ guard classes for inline safety checks +- **Infrastructure Nodes**: Topic guards and lifecycle managers for system-level safety +- **Configuration Driven**: YAML-based configuration for easy deployment +- **Multiple Patterns**: Context managers, RAII, timeouts, and non-blocking checks +- **Well Tested**: Comprehensive unit and integration tests +- **Documented**: Examples and usage patterns for common scenarios + +This package complements the Safety Controller architecture by providing the building blocks needed to implement safe autonomy at all levels of the system, from low-level topic filtering to high-level application logic. + +--- + +## Next Steps for Implementation + +1. **Package Setup** + - Create package structure with CMakeLists.txt and package.xml + - Set up dependencies (rclcpp, rclpy, std_msgs, lifecycle_msgs) + +2. **Core Library Development** + - Implement Python SentorGuard class + - Implement C++ SentorGuard class + - Add comprehensive error handling and logging + +3. **Node Development** + - Implement Topic Guard node + - Implement Lifecycle Guard node + - Create example user nodes + +4. **Configuration and Launch** + - Create parameter YAML files + - Implement launch files + - Document configuration options + +5. **Testing** + - Write unit tests for guard libraries + - Create integration tests with mock sentor + - Perform system tests with actual sentor + +6. **Documentation** + - API documentation + - Tutorial documentation + - Migration guide for existing systems + +7. **Integration** + - Test with Nav2 stack + - Validate with RobotStateMachine + - Benchmark performance and latency diff --git a/src/RobotStateMachine b/src/RobotStateMachine deleted file mode 160000 index 911d58d..0000000 --- a/src/RobotStateMachine +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 911d58da282b78a33795647eb1d7f55bb1d5df13 diff --git a/src/sentor/config/test_monitor_config.yaml b/src/sentor/config/test_monitor_config.yaml index f782016..ee7d8b6 100644 --- a/src/sentor/config/test_monitor_config.yaml +++ b/src/sentor/config/test_monitor_config.yaml @@ -75,8 +75,8 @@ monitors: #[] # timeout: 10 # default_notifications: False - - name: "/chatter/safety_critical" - message_type: "std_msgs/msg/String" + - name: "/hunter_status" + message_type: "hunter_msgs/msg/HunterStatus" rate: 10 N: 5 qos: @@ -90,43 +90,13 @@ monitors: #[] safety_critical: true # ← leave false if this “published” check shouldn’t gate safety‐beat autonomy_critical: true # ← set true if you want mere “published” to gate warning‐beat signal_lambdas: - - expression: "lambda data: data.data == 'hello'" + - expression: "lambda data: data.battery_voltage >= 20.0" timeout: 2.0 - safety_critical: false # affects safety beat - autonomy_critical: false # doesn’t affect warning beat - process_indices: [2] # indexes into `execute` list (optional) - repeat_exec: false # run every time satisfied? - tags: ["resolution"] + safety_critical: false + autonomy_critical: false + process_indices: [2] # Optional + repeat_exec: false # Only execute once when condition is met + tags: ["battery_check"] execute: [] timeout: 10 default_notifications: False - -# ros2 topic pub --rate 10 /chatter/safety_critical std_msgs/msg/String "{data: 'hello'}" - - - name: "/chatter_autonomy_critical" - message_type: "std_msgs/msg/String" - rate: 10 - N: 5 - qos: - reliability: "reliable" - durability: "volatile" - depth: 5 - - signal_when: - condition: "published" - timeout: 30 - safety_critical: false # ← leave false if this “published” check shouldn’t gate safety‐beat - autonomy_critical: true # ← set true if you want mere “published” to gate warning‐beat - signal_lambdas: - - expression: "lambda data: data.data == 'hello'" - timeout: 2.0 - safety_critical: false # affects safety beat - autonomy_critical: false # doesn’t affect warning beat - process_indices: [2] # indexes into `execute` list (optional) - repeat_exec: false # run every time satisfied? - tags: ["resolution"] - execute: [] - timeout: 10 - default_notifications: False - -# ros2 topic pub --rate 10 /chatter_autonomy_critical std_msgs/msg/String "{data: 'hello'}" diff --git a/src/sentor_guard/CMakeLists.txt b/src/sentor_guard/CMakeLists.txt new file mode 100644 index 0000000..92ac05a --- /dev/null +++ b/src/sentor_guard/CMakeLists.txt @@ -0,0 +1,177 @@ +cmake_minimum_required(VERSION 3.8) +project(sentor_guard) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +# Find dependencies +find_package(ament_cmake REQUIRED) +find_package(ament_cmake_python REQUIRED) +find_package(rclcpp REQUIRED) +find_package(rclpy REQUIRED) +find_package(std_msgs REQUIRED) +find_package(lifecycle_msgs REQUIRED) +find_package(builtin_interfaces REQUIRED) +find_package(sentor_msgs REQUIRED) + +# Optional: BehaviorTree.CPP for Nav2 integration +find_package(behaviortree_cpp QUIET) +if(behaviortree_cpp_FOUND) + message(STATUS "BehaviorTree.CPP found - building BT plugin") + set(BUILD_BT_PLUGIN TRUE) +else() + message(STATUS "BehaviorTree.CPP not found - skipping BT plugin") + set(BUILD_BT_PLUGIN FALSE) +endif() + +# Include directories +include_directories(include) + +# C++ Guard Library +add_library(${PROJECT_NAME}_lib SHARED + src/guard.cpp +) +ament_target_dependencies(${PROJECT_NAME}_lib + rclcpp + std_msgs + sentor_msgs +) + +# C++ Topic Guard Node +add_executable(topic_guard_node + src/topic_guard_node.cpp +) +ament_target_dependencies(topic_guard_node + rclcpp + std_msgs + sentor_msgs +) +target_link_libraries(topic_guard_node ${PROJECT_NAME}_lib) + +# C++ Lifecycle Guard Node +add_executable(lifecycle_guard_node + src/lifecycle_guard_node.cpp +) +ament_target_dependencies(lifecycle_guard_node + rclcpp + lifecycle_msgs + std_msgs + sentor_msgs +) +target_link_libraries(lifecycle_guard_node ${PROJECT_NAME}_lib) + +# BehaviorTree.CPP Plugin for Nav2 integration +if(BUILD_BT_PLUGIN) + add_library(sentor_guard_bt_nodes SHARED + src/bt_condition_node.cpp + ) + ament_target_dependencies(sentor_guard_bt_nodes + rclcpp + std_msgs + behaviortree_cpp + sentor_msgs + ) + target_link_libraries(sentor_guard_bt_nodes ${PROJECT_NAME}_lib) + + # Install BT plugin library + install(TARGETS sentor_guard_bt_nodes + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + ) + + # Install BT plugin description + install(FILES sentor_guard_bt_nodes.xml + DESTINATION share/${PROJECT_NAME} + ) +endif() + +# Install C++ libraries and executables +install(TARGETS + ${PROJECT_NAME}_lib + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin +) + +install(TARGETS + topic_guard_node + lifecycle_guard_node + DESTINATION lib/${PROJECT_NAME} +) + +# Install Python modules +ament_python_install_package(${PROJECT_NAME}) + + +install(DIRECTORY include/ + DESTINATION include +) + +# Install Python modules (handled by setup.py via ament_cmake_python) +# Note: ament_python_install_package is not needed when setup.py exists + +# Install Python executables +install(PROGRAMS + examples/python_guard_example.py + examples/nav2_examples/test_bt_integration.py + examples/nav2_examples/simple_guard_demo.py + DESTINATION lib/${PROJECT_NAME} +) + +# Install launch files +install(DIRECTORY + launch + DESTINATION share/${PROJECT_NAME} +) + +# Install config files +install(DIRECTORY + config + DESTINATION share/${PROJECT_NAME} +) + +# Install examples +install(DIRECTORY + examples + DESTINATION share/${PROJECT_NAME} + PATTERN "*.pyc" EXCLUDE +) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + #ament_lint_auto_find_test_dependencies() + + find_package(ament_cmake_pytest REQUIRED) + ament_add_pytest_test(test_python_guard test/test_python_guard.py) + + # C++ tests for BT condition node (only if BT.CPP is available) + if(BUILD_BT_PLUGIN) + find_package(ament_cmake_gtest REQUIRED) + ament_add_gtest(test_bt_condition_node test/test_bt_condition_node.cpp) + target_link_libraries(test_bt_condition_node sentor_guard_bt_nodes) + ament_target_dependencies(test_bt_condition_node + rclcpp + std_msgs + behaviortree_cpp + ) + endif() +endif() + +ament_export_include_directories(include) +ament_export_libraries(${PROJECT_NAME}_lib) +ament_export_dependencies( + rclcpp + std_msgs + sentor_msgs + lifecycle_msgs + builtin_interfaces +) + +if(BUILD_BT_PLUGIN) + ament_export_libraries(sentor_guard_bt_nodes) + ament_export_dependencies(behaviortree_cpp) +endif() + +ament_package() diff --git a/src/sentor_guard/README.md b/src/sentor_guard/README.md new file mode 100644 index 0000000..99b9ae1 --- /dev/null +++ b/src/sentor_guard/README.md @@ -0,0 +1,292 @@ +# sentor_guard + +Safety guard libraries and nodes for RobotStateMachine-based autonomous systems. + +## Overview + +The `sentor_guard` package provides reusable components for implementing safe autonomous behavior by monitoring the robot state and autonomous mode from RobotStateMachine. It offers three complementary approaches: + +1. **Software Context Guards** - Python and C++ libraries for inline safety checks +2. **Topic Guards** - Transparent topic forwarding with safety gating +3. **Lifecycle Guards** - Automatic lifecycle management of nodes based on safety conditions + +The guard monitors two topics published by RobotStateMachine: +- `/robot_state` (String): One of 'start-up', 'disabled', 'enabled', 'active' +- `/autonomous_mode` (Bool): TRUE in autonomous, FALSE in manual + +The guard ensures these messages are recent (within configurable timeout). + +## Features + +- **Python Library**: Context manager pattern (`with` statement) that blocks execution until conditions met +- **C++ Library**: RAII pattern for automatic safety checking +- **Topic Guard Node**: Filters messages (e.g., cmd_vel) based on guard conditions +- **Lifecycle Guard Node**: Manages lifecycle state of other nodes +- **ROS Parameter Configuration**: Easy configuration via YAML files +- **Multiple Usage Patterns**: Blocking wait, timeout-based wait, non-blocking checks +- **Message Freshness Checking**: Ensures state/mode updates are recent + +## Installation + +This package is part of the sentor repository. Build with: + +```bash +cd ~/ros2_ws +colcon build --packages-select sentor_guard +source install/setup.bash +``` + +## Quick Start + +### Python Guard + +```python +from sentor_guard import SentorGuard + +class MyNode(Node): + def __init__(self): + super().__init__('my_node') + self.guard = SentorGuard(self, required_state='active') + + def do_autonomous_action(self): + # Only executes when safe + with self.guard: + self.execute_navigation() +``` + +### C++ Guard + +```cpp +#include "sentor_guard/guard.hpp" + +class MyNode : public rclcpp::Node { + sentor_guard::SentorGuard guard_; +public: + MyNode() : Node("my_node"), guard_(shared_from_this()) {} + + void doAutonomousAction() { + // RAII guard - automatically waits + sentor_guard::SentorGuard::Guard guard(guard_); + executeNavigation(); + } +}; +``` + +### Launch Example + +```bash +ros2 launch sentor_guard guard_example.launch.py +``` + +## Configuration + +See `config/guard_params.yaml` for configuration options: + +```yaml +/**: + ros__parameters: + state_topic: "/robot_state" + mode_topic: "/autonomous_mode" + required_state: "active" + update_timeout: 1.0 # Maximum age of messages in seconds + require_autonomous_mode: true +``` + +## Safety Conditions + +A guard is satisfied when **all** of the following are true: + +1. **State Received**: Robot state message has been received +2. **State Fresh**: State message is recent (within update_timeout) +3. **State Match**: Current state equals required state (default: "active") +4. **Mode Received**: Autonomous mode message has been received +5. **Mode Fresh**: Mode message is recent (within update_timeout) +6. **Mode Enabled**: Autonomous mode is true (if require_autonomous_mode is true) + +## Usage Patterns + +### Pattern 1: Context Manager (Python) + +```python +with self.guard: + # Code here only runs when safe + execute_autonomous_action() +``` + +### Pattern 2: RAII Guard (C++) + +```cpp +{ + SentorGuard::Guard guard(my_guard); + // Code here only runs when safe + executeAutonomousAction(); +} +``` + +### Pattern 3: Timeout Wait + +```python +try: + self.guard.guarded_wait(timeout=5.0) + execute_action() +except AutonomyGuardException as e: + handle_timeout(e) +``` + +### Pattern 4: Non-blocking Check + +```python +if self.guard.is_autonomy_allowed(): + execute_action() +else: + reason = self.guard.get_blocking_reason() + log_warning(reason) +``` + +### Pattern 5: Decorator (Python) + +The `@sentor_guarded` decorator provides a clean way to protect functions/methods: + +```python +from sentor_guard import sentor_guarded + +class MyNode(Node): + def __init__(self): + super().__init__('my_node') + self.guard = SentorGuard(self) + + # Decorator without timeout (waits indefinitely) + @sentor_guarded() + def autonomous_action(self): + execute_navigation() + + # Decorator with timeout + @sentor_guarded(timeout=5.0) + def timed_action(self): + execute_task() + +# Can also use with explicit guard for standalone functions +my_guard = SentorGuard(node) + +@sentor_guarded(guard=my_guard, timeout=10.0) +def standalone_function(): + execute_something() +``` + +The decorator: +- Automatically checks guard conditions before executing the function +- Raises `AutonomyGuardException` if conditions not met within timeout +- Works with class methods (uses `self.guard`) or standalone functions (needs explicit guard) +- Supports optional timeout parameter + +## Nodes + +### topic_guard_node + +Forwards messages only when guard conditions are met. + +**Parameters:** +- `input_topic`: Source topic to monitor +- `output_topic`: Destination topic for forwarded messages +- `message_type`: ROS message type (e.g., "geometry_msgs/msg/Twist") +- `required_state`: Required robot state (default: "active") +- `heartbeat_timeout`: Maximum heartbeat age in seconds (default: 1.0) + +**Example:** + +```bash +ros2 run sentor_guard topic_guard_node --ros-args \ + -p input_topic:=/nav2/cmd_vel \ + -p output_topic:=/cmd_vel \ + -p message_type:=geometry_msgs/msg/Twist +``` + +### lifecycle_guard_node + +Manages lifecycle state of other nodes based on guard conditions. + +**Parameters:** +- `managed_nodes`: List of node names to manage +- `check_rate`: How often to check conditions (Hz, default: 10.0) + +**Example:** + +```bash +ros2 run sentor_guard lifecycle_guard_node --ros-args \ + -p managed_nodes:="['/controller_server', '/planner_server']" \ + -p check_rate:=10.0 +``` + +## Nav2 Integration + +### BehaviorTree Condition Node + +The `CheckAutonomyAllowed` BT condition node enables direct integration with Nav2 behavior trees for safe autonomous navigation. + +**Features:** +- Continuous safety monitoring during navigation +- Graceful pause/resume when conditions change +- Configurable via BT XML +- Uses standard BehaviorTree.CPP plugin mechanism + +**Installation:** + +Install BehaviorTree.CPP if not already present: +```bash +sudo apt install ros-$ROS_DISTRO-behaviortree-cpp +``` + +Build with BT support: +```bash +colcon build --packages-select sentor_guard +``` + +**Usage in Nav2:** + +1. Add to bt_navigator plugin list in your Nav2 params YAML: +```yaml +bt_navigator: + ros__parameters: + plugin_lib_names: + - nav2_compute_path_to_pose_action_bt_node + # ... other plugins ... + - sentor_guard_bt_nodes # Add this +``` + +2. Use in behavior tree XML: +```xml + + + + +``` + +For complete examples and integration patterns, see: +- [examples/nav2_examples/](examples/nav2_examples/) - Example BT XMLs and launch files +- [examples/nav2_examples/README.md](examples/nav2_examples/README.md) - Detailed integration guide + +## Testing + +Run tests: + +```bash +colcon test --packages-select sentor_guard +colcon test-result --verbose +``` + +## Documentation + +For complete architecture documentation, see: +- [SENTOR_GUARD_DESIGN.md](../../docs/SENTOR_GUARD_DESIGN.md) - Complete design specification +- [ARCHITECTURE_INTEGRATION.md](../../ARCHITECTURE_INTEGRATION.md) - System integration architecture + +## License + +MIT + +## Authors + +Part of the LCAS sentor project. diff --git a/src/sentor_guard/config/guard_params.yaml b/src/sentor_guard/config/guard_params.yaml new file mode 100644 index 0000000..62def2e --- /dev/null +++ b/src/sentor_guard/config/guard_params.yaml @@ -0,0 +1,12 @@ +/**: + ros__parameters: + # Topics to monitor from RobotStateMachine + state_topic: "/robot_state" + mode_topic: "/autonomous_mode" + + # Guard conditions + required_state: "active" # One of: 'start-up', 'disabled', 'enabled', 'active' + update_timeout: 1.0 # Maximum age of messages in seconds + + # Which conditions to enforce + require_autonomous_mode: true diff --git a/src/sentor_guard/examples/nav2_examples/README.md b/src/sentor_guard/examples/nav2_examples/README.md new file mode 100644 index 0000000..e52dd39 --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/README.md @@ -0,0 +1,337 @@ +# Nav2 Integration with sentor_guard + +This directory contains examples for integrating sentor_guard with Nav2 behavior trees to ensure safe autonomous navigation. + +## Overview + +The `CheckAutonomyAllowed` behavior tree condition node continuously monitors sentor guard conditions and controls navigation flow based on safety state. This enables: + +- **Graceful pause/resume**: Navigation automatically pauses when conditions are not met and resumes when they're satisfied +- **Real-time safety**: Continuous monitoring during navigation, not just at start +- **Integration with Nav2**: Uses standard BehaviorTree.CPP plugin mechanism +- **Configurable**: All safety parameters can be adjusted per use case + +## Files + +### Behavior Tree Examples +- `navigate_with_guard.xml` - Full Nav2 behavior tree with continuous safety monitoring and recovery +- `simple_nav_with_guard.xml` - Minimal example showing basic integration + +### Launch Files +- `nav2_with_guard_launch.py` - Example launch file for Nav2 with sentor_guard + +### Test and Demo Scripts +- `test_bt_integration.py` - Test script that publishes sample conditions to demonstrate guard behavior +- `simple_guard_demo.py` - Standalone demo showing guard usage in application code + +### Documentation +- `README.md` - This file + +## Requirements + +1. **ROS2** (tested with Humble/Iron) +2. **Nav2** stack installed +3. **sentor_guard** package built with BehaviorTree.CPP support +4. Topics published: + - `/robot_state` (std_msgs/String) - Current robot state + - `/autonomous_mode` (std_msgs/Bool) - Whether autonomous mode is enabled + - `/safety/heartbeat` (std_msgs/Bool) - Safety system heartbeat + - `/warning/heartbeat` (std_msgs/Bool) - Warning system heartbeat + +## Quick Start + +### 1. Build sentor_guard with BT support + +```bash +cd ~/ros2_ws +colcon build --packages-select sentor_guard +source install/setup.bash +``` + +If BehaviorTree.CPP is not found, install it: +```bash +sudo apt install ros-$ROS_DISTRO-behaviortree-cpp +``` + +### 2. Configure Nav2 to use sentor_guard plugin + +Add to your Nav2 parameters YAML file: + +```yaml +bt_navigator: + ros__parameters: + plugin_lib_names: + - nav2_compute_path_to_pose_action_bt_node + - nav2_follow_path_action_bt_node + # ... other Nav2 plugins ... + - sentor_guard_bt_nodes # Add this line + + default_nav_to_pose_bt_xml: /path/to/navigate_with_guard.xml +``` + +### 3. Use in your behavior tree + +Simple usage - check before navigation: +```xml + + + + + +``` + +Continuous monitoring during navigation: +```xml + + + + + + + +``` + +## CheckAutonomyAllowed Node Reference + +### Inputs (all optional with defaults) + +| Input | Type | Default | Description | +|-------|------|---------|-------------| +| `state_topic` | string | `/robot_state` | Topic publishing robot state | +| `mode_topic` | string | `/autonomous_mode` | Topic publishing autonomous mode | +| `safety_heartbeat_topic` | string | `/safety/heartbeat` | Safety heartbeat topic | +| `warning_heartbeat_topic` | string | `/warning/heartbeat` | Warning heartbeat topic | +| `required_state` | string | `active` | Required robot state for autonomy | +| `heartbeat_timeout` | int | `1000` | Heartbeat timeout in milliseconds | +| `require_autonomous_mode` | bool | `true` | Whether autonomous mode must be enabled | +| `require_safety_heartbeat` | bool | `true` | Whether safety heartbeat must be healthy | +| `require_warning_heartbeat` | bool | `true` | Whether warning heartbeat must be healthy | + +### Behavior + +The node returns: +- **SUCCESS** when all configured conditions are satisfied +- **FAILURE** when any condition is not met + +The node continuously evaluates conditions on each tick, enabling real-time response to safety state changes. + +## Usage Patterns + +### Pattern 1: Pre-Navigation Check + +Check conditions once before starting navigation: + +```xml + + + + +``` + +**Use when**: You want to prevent navigation from starting when unsafe. + +### Pattern 2: Continuous Monitoring + +Check conditions periodically during navigation: + +```xml + + + + + + +``` + +**Use when**: You want navigation to pause/resume based on safety state. + +### Pattern 3: Multi-Level Safety + +Check different safety levels at different points: + +```xml + + + + + + + + + + + + +``` + +**Use when**: You have different safety requirements for different phases. + +### Pattern 4: With Recovery Behaviors + +Combine with Nav2 recovery behaviors: + +```xml + + + + + + + + + + + + + + +``` + +**Use when**: You want standard Nav2 recovery combined with safety checks. + +## Integration with sentor + +The CheckAutonomyAllowed node integrates with the broader sentor ecosystem: + +1. **Sentor** monitors sensors and publishes heartbeats +2. **RobotStateMachine** manages robot state and autonomous mode +3. **sentor_guard** libraries provide reusable safety checking +4. **CheckAutonomyAllowed** BT node brings it into Nav2 behavior trees + +This creates a complete safety architecture for autonomous navigation. + +## Troubleshooting + +### Plugin not loading + +**Error**: `Failed to load behavior tree plugin sentor_guard_bt_nodes` + +**Solution**: +1. Verify package is built: `ros2 pkg list | grep sentor_guard` +2. Check plugin is installed: `ls $(ros2 pkg prefix sentor_guard)/lib/libsentor_guard_bt_nodes.so` +3. Source workspace: `source ~/ros2_ws/install/setup.bash` + +### Condition always fails + +**Error**: Navigation never starts, CheckAutonomyAllowed always returns FAILURE + +**Solution**: +1. Check topics are published: `ros2 topic echo /robot_state` +2. Verify values: State should be "active", mode should be true +3. Check heartbeats: `ros2 topic echo /safety/heartbeat` +4. Enable debug logging: `ros2 run nav2_bt_navigator bt_navigator --ros-args --log-level debug` + +### BehaviorTree.CPP not found during build + +**Error**: `Could not find a package configuration file provided by "behaviortree_cpp"` + +**Solution**: +```bash +sudo apt install ros-$ROS_DISTRO-behaviortree-cpp +# Or from source +cd ~/ros2_ws/src +git clone https://github.com/BehaviorTree/BehaviorTree.CPP.git +cd ~/ros2_ws +colcon build --packages-select behaviortree_cpp +``` + +## Testing + +### Test Scripts + +Two test scripts are provided to demonstrate and test the guard behavior: + +#### 1. Simple Guard Demo (`simple_guard_demo.py`) + +Demonstrates how the guard works in application code: + +```bash +# Terminal 1 - Run the demo +ros2 run sentor_guard simple_guard_demo.py + +# Terminal 2 - Publish safe conditions +ros2 topic pub /robot_state std_msgs/String "data: 'active'" -1 +ros2 topic pub /autonomous_mode std_msgs/Bool "data: true" -1 +ros2 topic pub /safety/heartbeat std_msgs/Bool "data: true" -r 2 +ros2 topic pub /warning/heartbeat std_msgs/Bool "data: true" -r 2 +``` + +This shows: +- How the guard checks conditions +- What happens when conditions are not met +- How navigation resumes when conditions are satisfied + +#### 2. BT Integration Test (`test_bt_integration.py`) + +Publishes a sequence of test conditions to verify guard behavior: + +```bash +# Run the test (it will publish various test conditions) +ros2 run sentor_guard test_bt_integration.py +``` + +This script: +- Publishes different combinations of conditions +- Demonstrates pause/resume scenarios +- Simulates realistic navigation conditions +- Useful for testing the CheckAutonomyAllowed BT node + +### Manual Testing + +Test the BT node behavior manually: + +```python +# test_bt_node.py +import rclpy +from rclpy.node import Node +from std_msgs.msg import String, Bool +import time + +node = rclpy.create_node('test_publisher') + +# Publishers +state_pub = node.create_publisher(String, '/robot_state', 10) +mode_pub = node.create_publisher(Bool, '/autonomous_mode', 10) +safety_pub = node.create_publisher(Bool, '/safety/heartbeat', 10) +warning_pub = node.create_publisher(Bool, '/warning/heartbeat', 10) + +# Publish safe conditions +state_msg = String() +state_msg.data = 'active' +state_pub.publish(state_msg) + +mode_msg = Bool() +mode_msg.data = True +mode_pub.publish(mode_msg) + +hb_msg = Bool() +hb_msg.data = True +safety_pub.publish(hb_msg) +warning_pub.publish(hb_msg) + +print("Publishing safe conditions...") +time.sleep(1.0) + +# Now test with unsafe conditions +state_msg.data = 'paused' +state_pub.publish(state_msg) +print("Changed to unsafe state") +``` + +## Further Reading + +- [Nav2 Behavior Trees](https://navigation.ros.org/behavior_trees/index.html) +- [BehaviorTree.CPP](https://www.behaviortree.dev/) +- [sentor_guard Documentation](../../README.md) +- [Integration Architecture](../../../ARCHITECTURE_INTEGRATION.md) + +## License + +MIT - Part of the LCAS sentor project diff --git a/src/sentor_guard/examples/nav2_examples/nav2_with_guard_launch.py b/src/sentor_guard/examples/nav2_examples/nav2_with_guard_launch.py new file mode 100644 index 0000000..9191a2e --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/nav2_with_guard_launch.py @@ -0,0 +1,88 @@ +""" +Example launch file for Nav2 with sentor_guard integration. + +This demonstrates how to configure Nav2 bt_navigator to use the sentor_guard +BT condition node. The key steps are: +1. Add sentor_guard_bt_nodes library to bt_navigator plugin list +2. Specify a behavior tree XML that uses CheckAutonomyAllowed +3. Ensure required topics are published (state, mode, heartbeats) + +This is a minimal example - in a real system you would include the full +Nav2 stack launch configuration. +""" + +import os +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration + + +def generate_launch_description(): + # Get package directories + sentor_guard_dir = get_package_share_directory('sentor_guard') + + # Declare launch arguments + behavior_tree_xml = LaunchConfiguration('behavior_tree') + declare_bt_xml_arg = DeclareLaunchArgument( + 'behavior_tree', + default_value=os.path.join( + sentor_guard_dir, 'examples', 'nav2_examples', 'navigate_with_guard.xml'), + description='Full path to behavior tree XML file' + ) + + # BT Navigator parameters + bt_navigator_params = { + 'default_nav_to_pose_bt_xml': behavior_tree_xml, + 'plugin_lib_names': [ + 'nav2_compute_path_to_pose_action_bt_node', + 'nav2_follow_path_action_bt_node', + 'nav2_back_up_action_bt_node', + 'nav2_spin_action_bt_node', + 'nav2_wait_action_bt_node', + 'nav2_clear_costmap_service_bt_node', + 'nav2_is_stuck_condition_bt_node', + 'nav2_goal_reached_condition_bt_node', + 'nav2_goal_updated_condition_bt_node', + 'nav2_rate_controller_bt_node', + 'nav2_distance_controller_bt_node', + 'nav2_speed_controller_bt_node', + 'nav2_truncate_path_action_bt_node', + 'nav2_recovery_node_bt_node', + 'nav2_pipeline_sequence_bt_node', + 'nav2_round_robin_node_bt_node', + 'nav2_transform_available_condition_bt_node', + 'nav2_time_expired_condition_bt_node', + 'nav2_distance_traveled_condition_bt_node', + # Add sentor_guard BT plugin + 'sentor_guard_bt_nodes', + ] + } + + # BT Navigator node + bt_navigator_node = Node( + package='nav2_bt_navigator', + executable='bt_navigator', + name='bt_navigator', + output='screen', + parameters=[bt_navigator_params], + ) + + # Example: Publish mock state and mode for testing + # In a real system, these would come from RobotStateMachine + mock_state_publisher = Node( + package='sentor_guard', + executable='python_guard_example.py', + name='mock_state_publisher', + output='screen', + parameters=[ + {'publish_mock_topics': True} + ] + ) + + return LaunchDescription([ + declare_bt_xml_arg, + bt_navigator_node, + # mock_state_publisher, # Uncomment for testing without real state machine + ]) diff --git a/src/sentor_guard/examples/nav2_examples/navigate_with_guard.xml b/src/sentor_guard/examples/nav2_examples/navigate_with_guard.xml new file mode 100644 index 0000000..1e6c362 --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/navigate_with_guard.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/sentor_guard/examples/nav2_examples/simple_guard_demo.py b/src/sentor_guard/examples/nav2_examples/simple_guard_demo.py new file mode 100755 index 0000000..a0720b3 --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/simple_guard_demo.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +""" +Simple demonstration of sentor_guard usage for Nav2 integration. + +This shows how the guard can be used in application code, similar to +how the CheckAutonomyAllowed BT node uses it internally. + +The pattern demonstrated here is what happens inside the behavior tree node, +but can also be used directly in application code for additional safety. + +Usage: + # Terminal 1 - Run this demo + ros2 run sentor_guard simple_guard_demo.py + + # Terminal 2 - Publish test conditions + ros2 topic pub /robot_state std_msgs/String "data: 'active'" -1 + ros2 topic pub /autonomous_mode std_msgs/Bool "data: true" -1 + ros2 topic pub /safety/heartbeat std_msgs/Bool "data: true" -r 2 + ros2 topic pub /warning/heartbeat std_msgs/Bool "data: true" -r 2 +""" + +import rclpy +from rclpy.node import Node +from sentor_guard import SentorGuard +import time + + +class SimpleGuardDemo(Node): + """Demo node showing guard usage.""" + + def __init__(self): + super().__init__('simple_guard_demo') + + # Create a guard with default settings + self.guard = SentorGuard( + self, + heartbeat_timeout=2.0, # Allow 2 seconds for heartbeat freshness + required_state='active' + ) + + self.get_logger().info('Simple Guard Demo started') + self.get_logger().info('Waiting for safety conditions to be met...') + self.get_logger().info('') + self.get_logger().info('Required conditions:') + self.get_logger().info(' - /robot_state == "active"') + self.get_logger().info(' - /autonomous_mode == true') + self.get_logger().info(' - /safety/heartbeat == true (fresh)') + self.get_logger().info(' - /warning/heartbeat == true (fresh)') + self.get_logger().info('') + self.get_logger().info('Publish these topics to allow navigation...') + + def check_and_navigate(self): + """Simulate navigation with guard checking.""" + + # Check if autonomy is allowed (non-blocking) + if self.guard.is_autonomy_allowed(): + self.get_logger().info('✓ Autonomy allowed - Navigation would proceed') + return True + else: + reason = self.guard.get_blocking_reason() + self.get_logger().warn(f'✗ Autonomy blocked: {reason}') + return False + + def simulate_navigation(self): + """Simulate a navigation task with continuous checking.""" + + self.get_logger().info('\n=== Simulating Navigation Task ===') + self.get_logger().info('This demonstrates continuous safety checking...\n') + + # Simulate navigation loop + for waypoint in range(1, 6): + self.get_logger().info(f'Navigating to waypoint {waypoint}/5...') + + # In real Nav2, this check happens in the behavior tree + # via the CheckAutonomyAllowed condition node + if not self.guard.is_autonomy_allowed(): + reason = self.guard.get_blocking_reason() + self.get_logger().error(f'Navigation paused: {reason}') + self.get_logger().info('Waiting for conditions to be satisfied...') + + # Wait for conditions with timeout + if self.guard.wait_for_autonomy(timeout=5.0): + self.get_logger().info('Conditions satisfied - Resuming navigation') + else: + self.get_logger().error('Timeout waiting for conditions - Aborting') + return False + + # Simulate navigation progress + time.sleep(1.0) + rclpy.spin_once(self, timeout_sec=0.1) + + self.get_logger().info('✓ Navigation complete!') + return True + + +def main(args=None): + """Main entry point.""" + rclpy.init(args=args) + + demo = SimpleGuardDemo() + + try: + # Create a rate for periodic checking + rate = demo.create_rate(1.0) # 1 Hz + + navigation_started = False + + while rclpy.ok(): + # Periodically check and report status + if demo.check_and_navigate(): + if not navigation_started: + # Once conditions are met, simulate a navigation task + navigation_started = True + demo.simulate_navigation() + demo.get_logger().info('\nReturning to monitoring mode...\n') + + rclpy.spin_once(demo, timeout_sec=0.1) + rate.sleep() + + except KeyboardInterrupt: + demo.get_logger().info('Demo stopped by user') + finally: + demo.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/src/sentor_guard/examples/nav2_examples/simple_nav_with_guard.xml b/src/sentor_guard/examples/nav2_examples/simple_nav_with_guard.xml new file mode 100644 index 0000000..b31af40 --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/simple_nav_with_guard.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + diff --git a/src/sentor_guard/examples/nav2_examples/test_bt_integration.py b/src/sentor_guard/examples/nav2_examples/test_bt_integration.py new file mode 100755 index 0000000..2878256 --- /dev/null +++ b/src/sentor_guard/examples/nav2_examples/test_bt_integration.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +""" +Test script to demonstrate the CheckAutonomyAllowed BT condition node. + +This script simulates the behavior tree node by publishing test conditions +and verifying that the guard responds correctly. + +Usage: + ros2 run sentor_guard test_bt_integration.py +""" + +import rclpy +from rclpy.node import Node +from std_msgs.msg import String, Bool +import time + + +class BTIntegrationTest(Node): + """Test node for demonstrating BT integration.""" + + def __init__(self): + super().__init__('bt_integration_test') + + # Create publishers for test conditions + self.state_pub = self.create_publisher(String, '/robot_state', 10) + self.mode_pub = self.create_publisher(Bool, '/autonomous_mode', 10) + self.safety_pub = self.create_publisher(Bool, '/safety/heartbeat', 10) + self.warning_pub = self.create_publisher(Bool, '/warning/heartbeat', 10) + + self.get_logger().info('BT Integration Test Node started') + self.get_logger().info('Publishing test conditions...') + + def publish_conditions(self, state='active', mode=True, safety=True, warning=True): + """Publish a set of conditions.""" + state_msg = String() + state_msg.data = state + self.state_pub.publish(state_msg) + + mode_msg = Bool() + mode_msg.data = mode + self.mode_pub.publish(mode_msg) + + safety_msg = Bool() + safety_msg.data = safety + self.safety_pub.publish(safety_msg) + + warning_msg = Bool() + warning_msg.data = warning + self.warning_pub.publish(warning_msg) + + self.get_logger().info( + f'Published: state={state}, mode={mode}, ' + f'safety={safety}, warning={warning}' + ) + + def run_test_sequence(self): + """Run a sequence of test conditions.""" + + # Test 1: All conditions met (autonomy allowed) + self.get_logger().info('\n=== Test 1: All conditions met ===') + self.publish_conditions('active', True, True, True) + time.sleep(1.0) + + # Test 2: Wrong state (autonomy blocked) + self.get_logger().info('\n=== Test 2: State is paused ===') + self.publish_conditions('paused', True, True, True) + time.sleep(1.0) + + # Test 3: Autonomous mode disabled (autonomy blocked) + self.get_logger().info('\n=== Test 3: Autonomous mode disabled ===') + self.publish_conditions('active', False, True, True) + time.sleep(1.0) + + # Test 4: Safety heartbeat unhealthy (autonomy blocked) + self.get_logger().info('\n=== Test 4: Safety heartbeat unhealthy ===') + self.publish_conditions('active', True, False, True) + time.sleep(1.0) + + # Test 5: Warning heartbeat unhealthy (autonomy blocked) + self.get_logger().info('\n=== Test 5: Warning heartbeat unhealthy ===') + self.publish_conditions('active', True, True, False) + time.sleep(1.0) + + # Test 6: Return to safe conditions (autonomy allowed) + self.get_logger().info('\n=== Test 6: Return to safe conditions ===') + self.publish_conditions('active', True, True, True) + time.sleep(1.0) + + # Test 7: Simulate navigation scenario - pause and resume + self.get_logger().info('\n=== Test 7: Simulating pause/resume during navigation ===') + for i in range(3): + self.get_logger().info(f' Navigation cycle {i+1}/3') + self.publish_conditions('active', True, True, True) + time.sleep(0.5) + + self.get_logger().info(' -> Pausing (obstacle detected)') + self.publish_conditions('paused', True, True, True) + time.sleep(0.5) + + self.get_logger().info(' -> Resuming (obstacle cleared)') + + self.publish_conditions('active', True, True, True) + + self.get_logger().info('\n=== Test sequence complete ===') + self.get_logger().info('If CheckAutonomyAllowed is running in a behavior tree,') + self.get_logger().info('it would have returned SUCCESS/FAILURE based on these conditions.') + + +def main(args=None): + """Main entry point.""" + rclpy.init(args=args) + + test_node = BTIntegrationTest() + + try: + # Run the test sequence + test_node.run_test_sequence() + + # Keep publishing the final safe state + rate = test_node.create_rate(1.0) + test_node.get_logger().info('\nPublishing safe conditions at 1Hz...') + test_node.get_logger().info('Press Ctrl+C to stop') + + while rclpy.ok(): + test_node.publish_conditions('active', True, True, True) + rclpy.spin_once(test_node, timeout_sec=0.1) + rate.sleep() + + except KeyboardInterrupt: + test_node.get_logger().info('Test interrupted by user') + finally: + test_node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/src/sentor_guard/examples/python_guard_example.py b/src/sentor_guard/examples/python_guard_example.py new file mode 100755 index 0000000..aaa6da5 --- /dev/null +++ b/src/sentor_guard/examples/python_guard_example.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +"""Example usage of Python SentorGuard.""" + +import rclpy +from rclpy.node import Node +from geometry_msgs.msg import Twist +from sentor_guard import SentorGuard, AutonomyGuardException, sentor_guarded + + +class GuardedNavigationNode(Node): + """Example node demonstrating SentorGuard usage patterns.""" + + def __init__(self): + super().__init__('guarded_navigation_example') + + # Initialize guard + # Monitors /robot_state and /autonomous_mode from RobotStateMachine + self.guard = SentorGuard( + self, + required_state='active', + update_timeout=1.0, + require_autonomous_mode=True + ) + + # Create publisher + self.cmd_vel_pub = self.create_publisher(Twist, '/cmd_vel', 10) + + # Create timer for periodic action + self.timer = self.create_timer(0.1, self.timer_callback) + + self.get_logger().info("Guarded navigation node started") + + def timer_callback(self): + """Periodic callback - only publishes if guard allows.""" + if self.guard.is_autonomy_allowed(): + # Autonomy allowed, publish command + msg = Twist() + msg.linear.x = 0.5 + self.cmd_vel_pub.publish(msg) + self.get_logger().debug("Published cmd_vel") + else: + # Not allowed, log reason + reason = self.guard.get_blocking_reason() + self.get_logger().debug(f"Navigation blocked: {reason}") + + def execute_mission(self): + """Execute a mission with timeout.""" + try: + # Wait up to 10 seconds for autonomy + self.guard.guarded_wait(timeout=10.0) + + # Proceed with mission + self.get_logger().info("Starting mission") + self.run_mission_steps() + + except AutonomyGuardException as e: + self.get_logger().error(f"Mission aborted: {e}") + + def run_mission_steps(self): + """Placeholder for mission execution.""" + self.get_logger().info("Running mission steps...") + + def critical_action(self): + """Critical action that must wait indefinitely.""" + with self.guard: + self.get_logger().info("Executing critical action") + # This code only runs when guard conditions are met + self.perform_action() + + def perform_action(self): + """Placeholder for action execution.""" + self.get_logger().info("Performing action...") + + @sentor_guarded() + def decorator_example_no_timeout(self): + """Example using decorator without timeout (waits indefinitely).""" + self.get_logger().info("Decorator example: executing guarded action") + msg = Twist() + msg.linear.x = 1.0 + self.cmd_vel_pub.publish(msg) + + @sentor_guarded(timeout=5.0) + def decorator_example_with_timeout(self): + """Example using decorator with timeout.""" + self.get_logger().info("Decorator example with timeout: executing guarded action") + msg = Twist() + msg.linear.x = 0.8 + self.cmd_vel_pub.publish(msg) + + +def main(): + """Main entry point.""" + rclpy.init() + node = GuardedNavigationNode() + + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + node.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/src/sentor_guard/include/sentor_guard/bt_condition_node.hpp b/src/sentor_guard/include/sentor_guard/bt_condition_node.hpp new file mode 100644 index 0000000..d7d7816 --- /dev/null +++ b/src/sentor_guard/include/sentor_guard/bt_condition_node.hpp @@ -0,0 +1,95 @@ +#pragma once + +#include +#include +#include + +#include "behaviortree_cpp/condition_node.h" +#include "rclcpp/rclcpp.hpp" +#include "sentor_guard/guard.hpp" + +namespace sentor_guard +{ + +/** + * @brief BehaviorTree condition node that checks sentor guard status + * + * This condition node continuously checks if autonomy is allowed based on + * RobotStateMachine state and mode. It returns SUCCESS when autonomy is + * allowed and FAILURE otherwise. + * + * This enables behavior trees to respond to safety conditions in real-time, + * allowing navigation to pause when conditions are not met and resume when + * they are satisfied again. + * + * Example usage in BT XML: + * @code{.xml} + * + * @endcode + */ +class CheckAutonomyAllowed : public BT::ConditionNode +{ +public: + /** + * @brief Constructor for the BT condition node + * + * @param name Name of the node in the behavior tree + * @param config Configuration object containing node parameters + * @param node ROS2 node pointer for subscriptions + */ + CheckAutonomyAllowed( + const std::string & name, + const BT::NodeConfiguration & config, + rclcpp::Node::SharedPtr node); + + /** + * @brief Provides the list of ports (inputs/outputs) for this BT node + */ + static BT::PortsList providedPorts(); + + /** + * @brief Called when the node is executed by the behavior tree + * + * @return SUCCESS if autonomy is allowed, FAILURE otherwise + */ + BT::NodeStatus tick() override; + +private: + rclcpp::Node::SharedPtr node_; + std::shared_ptr guard_; + + // Configuration + std::string state_topic_; + std::string mode_topic_; + std::string required_state_; + std::chrono::milliseconds update_timeout_; + bool require_autonomous_mode_; +}; + +/** + * @brief Factory function for creating CheckAutonomyAllowed nodes + * + * This is used by the BehaviorTree.CPP factory to create instances of the node. + * The node pointer must be passed via the blackboard with key "node". + */ +inline std::unique_ptr CheckAutonomyAllowedFactory( + const std::string & name, + const BT::NodeConfiguration & config) +{ + rclcpp::Node::SharedPtr node; + if (!config.blackboard->get("node", node)) { + throw std::runtime_error("Failed to get 'node' from blackboard"); + } + + if (!node) { + throw BT::RuntimeError("CheckAutonomyAllowed requires a ROS2 node in blackboard"); + } + + return std::make_unique(name, config, node); +} + +} // namespace sentor_guard diff --git a/src/sentor_guard/include/sentor_guard/guard.hpp b/src/sentor_guard/include/sentor_guard/guard.hpp new file mode 100644 index 0000000..d7fc015 --- /dev/null +++ b/src/sentor_guard/include/sentor_guard/guard.hpp @@ -0,0 +1,147 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sentor_msgs/msg/guard_status.hpp" + +namespace sentor_guard { + +/** + * @brief Exception thrown when autonomy guard conditions are not met + */ +class AutonomyGuardException : public std::runtime_error { +public: + explicit AutonomyGuardException(const std::string& message) + : std::runtime_error(message) {} +}; + +/** + * @brief Guard that checks robot state and autonomous mode before allowing execution + * + * Monitors /robot_state and /autonomous_mode topics from RobotStateMachine. + * The guard ensures these messages are recent (within update_timeout). + * + * Can be used with RAII pattern via the Guard nested class. + */ +class SentorGuard { +public: + /** + * @brief Configuration options for the guard + */ + struct Options { + std::string state_topic = "/robot_state"; + std::string mode_topic = "/autonomous_mode"; + std::chrono::milliseconds update_timeout{1000}; + std::string required_state = "active"; + bool require_autonomous_mode = true; + }; + + /** + * @brief Construct a new Sentor Guard object with default options + * + * @param node ROS2 node to use for subscriptions + */ + explicit SentorGuard(rclcpp::Node::SharedPtr node); + + /** + * @brief Construct a new Sentor Guard object + * + * @param node ROS2 node to use for subscriptions + * @param options Configuration options + */ + SentorGuard(rclcpp::Node::SharedPtr node, const Options& options); + + ~SentorGuard() = default; + + /** + * @brief Check if autonomy is currently allowed (non-blocking) + * + * @return true if all guard conditions are satisfied + */ + bool isAutonomyAllowed(); + + /** + * @brief Get reason why autonomy is blocked + * + * @return String describing why autonomy is not allowed + */ + std::string getBlockingReason() const; + + /** + * @brief Wait until autonomy is allowed + * + * @param timeout Maximum time to wait (zero means indefinite) + * @return true if autonomy is allowed, false if timeout occurred + */ + bool waitForAutonomy(std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()); + + /** + * @brief Wait with exception on timeout + * + * @param timeout Maximum time to wait (zero means indefinite) + * @throws AutonomyGuardException if timeout occurs + */ + void guardedWait(std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()); + + /** + * @brief RAII guard class for automatic safety checking + * + * Usage: + * SentorGuard::Guard guard(my_guard); + * // Code here only executes when safe + */ + class Guard { + public: + explicit Guard(SentorGuard& guard, std::chrono::milliseconds timeout = std::chrono::milliseconds::zero()) + : guard_(guard) { + guard_.guardedWait(timeout); + } + ~Guard() = default; + + Guard(const Guard&) = delete; + Guard& operator=(const Guard&) = delete; + + private: + SentorGuard& guard_; + }; + +private: + void stateCallback(const std_msgs::msg::String::SharedPtr msg); + void modeCallback(const std_msgs::msg::Bool::SharedPtr msg); + + void checkConditions(); + void publishBlockingStatus(bool is_blocking, const std::vector& call_stack = {}); + std::vector getTruncatedCallStack(int max_frames = 10); + + rclcpp::Node::SharedPtr node_; + Options options_; + + rclcpp::Subscription::SharedPtr state_sub_; + rclcpp::Subscription::SharedPtr mode_sub_; + rclcpp::Publisher::SharedPtr status_publisher_; + + mutable std::mutex mutex_; + std::condition_variable cv_; + + std::string current_state_; + bool autonomous_mode_{false}; + rclcpp::Time last_state_time_; + rclcpp::Time last_mode_time_; + bool condition_met_{false}; + + // Blocking status tracking + bool is_currently_blocking_{false}; + rclcpp::Time blocking_start_time_; + std::vector blocking_call_stack_; +}; + +} // namespace sentor_guard diff --git a/src/sentor_guard/launch/guard_example.launch.py b/src/sentor_guard/launch/guard_example.launch.py new file mode 100644 index 0000000..473ce86 --- /dev/null +++ b/src/sentor_guard/launch/guard_example.launch.py @@ -0,0 +1,54 @@ +"""Example launch file for sentor_guard.""" + +from launch import LaunchDescription +from launch_ros.actions import Node +from launch.actions import DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from ament_index_python.packages import get_package_share_directory +import os + + +def generate_launch_description(): + """Generate launch description for sentor_guard example.""" + pkg_share = get_package_share_directory('sentor_guard') + + guard_params_file = os.path.join(pkg_share, 'config', 'guard_params.yaml') + + return LaunchDescription([ + DeclareLaunchArgument( + 'guard_params', + default_value=guard_params_file, + description='Path to guard parameters file' + ), + + # Launch lifecycle guard node + Node( + package='sentor_guard', + executable='lifecycle_guard_node', + name='lifecycle_guard', + output='screen', + parameters=[ + LaunchConfiguration('guard_params'), + { + 'managed_nodes': ['/controller_server', '/planner_server'], + 'check_rate': 10.0, + } + ], + ), + + # Example: Topic guard for cmd_vel + Node( + package='sentor_guard', + executable='topic_guard_node', + name='cmd_vel_guard', + output='screen', + parameters=[ + LaunchConfiguration('guard_params'), + { + 'input_topic': '/nav2/cmd_vel', + 'output_topic': '/cmd_vel', + 'message_type': 'geometry_msgs/msg/Twist', + } + ], + ), + ]) diff --git a/src/sentor_guard/package.xml b/src/sentor_guard/package.xml new file mode 100644 index 0000000..a5dcf01 --- /dev/null +++ b/src/sentor_guard/package.xml @@ -0,0 +1,47 @@ + + + + sentor_guard + 0.1.0 + Safety guard libraries and nodes for sentor-based autonomous systems + + Your Name + MIT + + + ament_cmake + ament_cmake_python + rosidl_default_generators + + + rclcpp + rclpy + std_msgs + sentor_msgs + lifecycle_msgs + builtin_interfaces + sentor + + + rosidl_default_runtime + rosidl_interface_packages + + + behaviortree_cpp + + + python3-yaml + + + nav2_bt_navigator + + + ament_lint_auto + ament_lint_common + ament_cmake_pytest + ament_cmake_gtest + + + ament_cmake + + diff --git a/src/sentor_guard/resource/sentor_guard b/src/sentor_guard/resource/sentor_guard new file mode 100644 index 0000000..e69de29 diff --git a/src/sentor_guard/sentor_guard/__init__.py b/src/sentor_guard/sentor_guard/__init__.py new file mode 100644 index 0000000..0eae896 --- /dev/null +++ b/src/sentor_guard/sentor_guard/__init__.py @@ -0,0 +1,5 @@ +"""Sentor Guard package for safe autonomous operations.""" + +from .guard import SentorGuard, AutonomyGuardException, sentor_guarded + +__all__ = ['SentorGuard', 'AutonomyGuardException', 'sentor_guarded'] diff --git a/src/sentor_guard/sentor_guard/guard.py b/src/sentor_guard/sentor_guard/guard.py new file mode 100644 index 0000000..29cabb1 --- /dev/null +++ b/src/sentor_guard/sentor_guard/guard.py @@ -0,0 +1,417 @@ +"""Python implementation of SentorGuard for safe autonomous operations.""" + +import rclpy +from rclpy.node import Node +from rclpy.time import Time, Duration +from std_msgs.msg import String, Bool +from threading import Event, Lock +import time +import traceback + + +class AutonomyGuardException(Exception): + """Raised when autonomy guard conditions are not met within timeout.""" + pass + + +class SentorGuard: + """ + Guard that checks robot state and autonomous mode before allowing execution. + + Monitors /robot_state and /autonomous_mode topics from RobotStateMachine. + The guard ensures these messages are recent (within update_timeout). + + Can be used as a context manager or called directly. + + Example: + # As context manager + with SentorGuard(node): + execute_autonomous_action() + + # With explicit timeout + guard = SentorGuard(node) + if guard.wait_for_autonomy(timeout=5.0): + execute_autonomous_action() + """ + + def __init__(self, node: Node, + state_topic: str = '/robot_state', + mode_topic: str = '/autonomous_mode', + update_timeout: float = 1.0, + required_state: str = 'active', + require_autonomous_mode: bool = True): + """ + Initialize the guard. + + Args: + node: ROS2 node to use for subscriptions + state_topic: Topic publishing robot state (String: 'start-up', 'disabled', 'enabled', 'active') + mode_topic: Topic publishing autonomous mode flag (Bool) + update_timeout: Maximum age of state/mode messages in seconds + required_state: State required for autonomy (default: 'active') + require_autonomous_mode: Whether autonomous mode must be true (default: True) + """ + self.node = node + self.update_timeout = Duration(seconds=update_timeout) + self.required_state = required_state + self.require_autonomous_mode = require_autonomous_mode + + self._lock = Lock() + self._current_state = None + self._autonomous_mode = None + self._last_state_time = None + self._last_mode_time = None + self._condition_met = Event() + + # Tracking for blocking status + self._is_currently_blocking = False + self._blocking_start_time = None + self._blocking_call_stack = None + + # Subscribe to state and mode topics + self._state_sub = node.create_subscription( + String, + state_topic, + self._state_callback, + 10 + ) + + self._mode_sub = node.create_subscription( + Bool, + mode_topic, + self._mode_callback, + 10 + ) + + # Publisher for blocking status using GuardStatus message + try: + from sentor_msgs.msg import GuardStatus + self._status_publisher = node.create_publisher( + GuardStatus, + '/sentor_guard/blocking_reason', + 10 + ) + self._GuardStatus = GuardStatus + except ImportError: + self.node.get_logger().warn( + "GuardStatus message not available - blocking status will not be published" + ) + self._status_publisher = None + self._GuardStatus = None + + self.node.get_logger().info( + f"SentorGuard initialized: required_state='{required_state}', " + f"update_timeout={update_timeout}s" + ) + + def _state_callback(self, msg): + """Handle robot state updates.""" + with self._lock: + self._current_state = msg.data + self._last_state_time = self.node.get_clock().now() + self._check_conditions() + + def _mode_callback(self, msg): + """Handle autonomous mode updates.""" + with self._lock: + self._autonomous_mode = msg.data + self._last_mode_time = self.node.get_clock().now() + self._check_conditions() + + def _check_conditions(self): + """Check if all conditions are met.""" + now = self.node.get_clock().now() + + # Check if we have received state message + if self._current_state is None or self._last_state_time is None: + self._condition_met.clear() + return + + # Check if state message is recent + state_age = now - self._last_state_time + if state_age > self.update_timeout: + self._condition_met.clear() + return + + # Check state value + if self._current_state != self.required_state: + self._condition_met.clear() + return + + # Check if we have received mode message + if self._autonomous_mode is None or self._last_mode_time is None: + self._condition_met.clear() + return + + # Check if mode message is recent + mode_age = now - self._last_mode_time + if mode_age > self.update_timeout: + self._condition_met.clear() + return + + # Check autonomous mode + if self.require_autonomous_mode and not self._autonomous_mode: + self._condition_met.clear() + return + + # All conditions met + self._condition_met.set() + + def is_autonomy_allowed(self) -> bool: + """ + Check if autonomy is currently allowed (non-blocking). + + Returns: + True if all guard conditions are satisfied, False otherwise + """ + with self._lock: + self._check_conditions() # Recheck heartbeat age + return self._condition_met.is_set() + + def get_blocking_reason(self) -> str: + """ + Get human-readable reason why autonomy is blocked. + + Returns: + String describing why autonomy is not allowed + """ + with self._lock: + now = self.node.get_clock().now() + + # Check state + if self._current_state is None or self._last_state_time is None: + return "Robot state not received" + + state_age = now - self._last_state_time + if state_age > self.update_timeout: + return f"Robot state stale ({state_age.nanoseconds / 1e9:.2f}s old)" + + if self._current_state != self.required_state: + return f"State is '{self._current_state}', required '{self.required_state}'" + + # Check mode + if self._autonomous_mode is None or self._last_mode_time is None: + return "Autonomous mode not received" + + mode_age = now - self._last_mode_time + if mode_age > self.update_timeout: + return f"Autonomous mode stale ({mode_age.nanoseconds / 1e9:.2f}s old)" + + if self.require_autonomous_mode and not self._autonomous_mode: + return "Autonomous mode is disabled" + + return "Unknown reason" + + def _publish_blocking_status(self, is_blocking: bool, call_stack: list = None): + """ + Publish blocking status message. + + Args: + is_blocking: True if guard is blocking, False if it just passed + call_stack: List of call stack frames (optional) + """ + if self._status_publisher is None or self._GuardStatus is None: + return + + try: + from builtin_interfaces.msg import Time as TimeMsg + + msg = self._GuardStatus() + msg.node_name = self.node.get_name() + msg.is_blocking = is_blocking + + if is_blocking: + msg.blocking_reason = self.get_blocking_reason() + msg.call_stack = call_stack if call_stack else [] + now = self.node.get_clock().now() + msg.blocked_at = TimeMsg( + sec=now.seconds_nanoseconds()[0], + nanosec=now.seconds_nanoseconds()[1] + ) + msg.blocked_duration = 0.0 + else: + # Guard is passing after being blocked + msg.blocking_reason = "" + msg.call_stack = [] + if self._blocking_start_time: + now = self.node.get_clock().now() + duration = now - self._blocking_start_time + msg.blocked_duration = duration.nanoseconds / 1e9 + msg.blocked_at = TimeMsg( + sec=self._blocking_start_time.seconds_nanoseconds()[0], + nanosec=self._blocking_start_time.seconds_nanoseconds()[1] + ) + else: + msg.blocked_duration = 0.0 + msg.blocked_at = TimeMsg() + + self._status_publisher.publish(msg) + except Exception as e: + self.node.get_logger().warn(f"Failed to publish blocking status: {e}") + + def _get_truncated_call_stack(self, max_frames: int = 10) -> list: + """ + Get truncated call stack as list of strings. + + Args: + max_frames: Maximum number of frames to include + + Returns: + List of strings representing call stack frames + """ + stack = traceback.extract_stack() + # Skip the last few frames (this function and the guard methods) + stack = stack[:-3] + # Take only the last max_frames + stack = stack[-max_frames:] + + result = [] + for frame in stack: + result.append(f"{frame.filename}:{frame.lineno} in {frame.name}") + + return result + + def wait_for_autonomy(self, timeout: float = None) -> bool: + """ + Wait until autonomy is allowed. + + Args: + timeout: Maximum time to wait in seconds. None for indefinite. + + Returns: + True if autonomy is allowed, False if timeout occurred + """ + start_time = time.time() + first_block = True + + while rclpy.ok(): + is_allowed = self.is_autonomy_allowed() + + if is_allowed: + # If we were previously blocking, publish that we're now passing + if self._is_currently_blocking: + self._publish_blocking_status(is_blocking=False) + self._is_currently_blocking = False + self._blocking_start_time = None + self._blocking_call_stack = None + return True + + # We're blocking - publish status on first block + if first_block: + first_block = False + self._is_currently_blocking = True + self._blocking_start_time = self.node.get_clock().now() + self._blocking_call_stack = self._get_truncated_call_stack() + self._publish_blocking_status( + is_blocking=True, + call_stack=self._blocking_call_stack + ) + + if timeout is not None: + elapsed = time.time() - start_time + if elapsed >= timeout: + reason = self.get_blocking_reason() + self.node.get_logger().warn( + f"Autonomy not granted within {timeout}s: {reason}" + ) + return False + + # Spin once to process callbacks + rclpy.spin_once(self.node, timeout_sec=0.1) + + return False + + def __enter__(self): + """Context manager entry - waits indefinitely by default.""" + if not self.wait_for_autonomy(): + raise AutonomyGuardException("Autonomy not allowed and node is shutting down") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + return False + + def guarded_wait(self, timeout: float = None): + """ + Wait for autonomy with timeout, raising exception on failure. + + Args: + timeout: Maximum time to wait. None for indefinite. + + Raises: + AutonomyGuardException: If timeout occurs + """ + if not self.wait_for_autonomy(timeout): + reason = self.get_blocking_reason() + if timeout: + raise AutonomyGuardException( + f"Autonomy not granted within {timeout}s timeout: {reason}" + ) + else: + raise AutonomyGuardException( + f"Autonomy not allowed: {reason}" + ) + + +def sentor_guarded(guard: SentorGuard = None, timeout: float = None): + """ + Decorator that ensures a function only executes when guard conditions are met. + + This decorator can be used in two ways: + 1. With a guard instance as an argument + 2. As a method decorator in a class that has a 'guard' attribute + + Args: + guard: SentorGuard instance to check (optional if used on class methods) + timeout: Maximum time to wait for guard conditions (None = indefinite) + + Example: + # Using with explicit guard + @sentor_guarded(guard=my_guard, timeout=5.0) + def autonomous_action(): + execute_navigation() + + # Using as method decorator (class must have self.guard) + class MyNode(Node): + def __init__(self): + self.guard = SentorGuard(self) + + @sentor_guarded() + def autonomous_action(self): + execute_navigation() + + Raises: + AutonomyGuardException: If guard conditions are not met within timeout + """ + def decorator(func): + def wrapper(*args, **kwargs): + # Try to get guard from arguments + guard_instance = guard + + # If no guard provided, try to get it from self (for class methods) + if guard_instance is None and len(args) > 0: + # Check if first argument (self) has a guard attribute + if hasattr(args[0], 'guard'): + guard_instance = args[0].guard + + # If still no guard, raise an error + if guard_instance is None: + raise ValueError( + "sentor_guarded decorator requires either a 'guard' parameter " + "or to be used on a method of a class with a 'guard' attribute" + ) + + # Check if guard conditions are met + if not isinstance(guard_instance, SentorGuard): + raise TypeError( + f"Expected SentorGuard instance, got {type(guard_instance)}" + ) + + # Wait for autonomy with the specified timeout + guard_instance.guarded_wait(timeout=timeout) + + # Execute the function if guard conditions are met + return func(*args, **kwargs) + + return wrapper + return decorator diff --git a/src/sentor_guard/sentor_guard_bt_nodes.xml b/src/sentor_guard/sentor_guard_bt_nodes.xml new file mode 100644 index 0000000..e9a1522 --- /dev/null +++ b/src/sentor_guard/sentor_guard_bt_nodes.xml @@ -0,0 +1,11 @@ + + + + + Checks if autonomous operation is allowed based on RobotStateMachine state and mode. + Returns SUCCESS when all conditions are met (robot state active, autonomous mode true), + FAILURE otherwise. Continuously monitors conditions enabling dynamic behavior. + + + diff --git a/src/sentor_guard/setup.cfg b/src/sentor_guard/setup.cfg new file mode 100644 index 0000000..5919ad1 --- /dev/null +++ b/src/sentor_guard/setup.cfg @@ -0,0 +1,5 @@ +[develop] +script_dir=$base/lib/sentor_guard + +[install] +install_scripts=$base/lib/sentor_guard diff --git a/src/sentor_guard/setup.py b/src/sentor_guard/setup.py new file mode 100644 index 0000000..cb91ca3 --- /dev/null +++ b/src/sentor_guard/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup + +package_name = 'sentor_guard' + +setup( + name=package_name, + version='0.1.0', + packages=[package_name], + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='Your Name', + maintainer_email='you@example.com', + description='Safety guard libraries and nodes for sentor-based autonomous systems', + license='MIT', + tests_require=['pytest'], + entry_points={ + 'console_scripts': [ + 'python_guard_example = examples.python_guard_example:main', + ], + }, +) diff --git a/src/sentor_guard/src/bt_condition_node.cpp b/src/sentor_guard/src/bt_condition_node.cpp new file mode 100644 index 0000000..8ab9c26 --- /dev/null +++ b/src/sentor_guard/src/bt_condition_node.cpp @@ -0,0 +1,82 @@ +#include "sentor_guard/bt_condition_node.hpp" + +namespace sentor_guard +{ + +CheckAutonomyAllowed::CheckAutonomyAllowed( + const std::string & name, + const BT::NodeConfiguration & config, + rclcpp::Node::SharedPtr node) +: BT::ConditionNode(name, config), + node_(node) +{ + // Get configuration from ports (or use defaults) + getInput("state_topic", state_topic_); + getInput("mode_topic", mode_topic_); + getInput("required_state", required_state_); + + // Get timeout in milliseconds + int timeout_ms; + if (getInput("update_timeout", timeout_ms)) { + update_timeout_ = std::chrono::milliseconds(timeout_ms); + } else { + update_timeout_ = std::chrono::milliseconds(1000); + } + + // Get optional boolean flag + getInput("require_autonomous_mode", require_autonomous_mode_); + + // Create guard with configuration + SentorGuard::Options options; + + if (!state_topic_.empty()) { + options.state_topic = state_topic_; + } + if (!mode_topic_.empty()) { + options.mode_topic = mode_topic_; + } + if (!required_state_.empty()) { + options.required_state = required_state_; + } + + options.update_timeout = update_timeout_; + options.require_autonomous_mode = require_autonomous_mode_; + + guard_ = std::make_shared(node_, options); + + RCLCPP_INFO(node_->get_logger(), + "CheckAutonomyAllowed BT node initialized: required_state='%s'", + options.required_state.c_str()); +} + +BT::PortsList CheckAutonomyAllowed::providedPorts() +{ + return { + BT::InputPort("state_topic", "/robot_state", + "Topic publishing robot state"), + BT::InputPort("mode_topic", "/autonomous_mode", + "Topic publishing autonomous mode"), + BT::InputPort("required_state", "active", + "Required robot state for autonomy"), + BT::InputPort("update_timeout", 1000, + "Message update timeout in milliseconds"), + BT::InputPort("require_autonomous_mode", true, + "Whether autonomous mode must be enabled") + }; +} + +BT::NodeStatus CheckAutonomyAllowed::tick() +{ + // Check if autonomy is currently allowed + if (guard_->isAutonomyAllowed()) { + return BT::NodeStatus::SUCCESS; + } else { + // Log reason for debugging (at debug level to avoid spam) + std::string reason = guard_->getBlockingReason(); + RCLCPP_DEBUG(node_->get_logger(), + "Autonomy not allowed: %s", reason.c_str()); + return BT::NodeStatus::FAILURE; + } +} + +} // namespace sentor_guard diff --git a/src/sentor_guard/src/guard.cpp b/src/sentor_guard/src/guard.cpp new file mode 100644 index 0000000..a3c89f1 --- /dev/null +++ b/src/sentor_guard/src/guard.cpp @@ -0,0 +1,271 @@ +#include "sentor_guard/guard.hpp" +#include "sentor_msgs/msg/guard_status.hpp" +#include + +namespace sentor_guard { + +SentorGuard::SentorGuard(rclcpp::Node::SharedPtr node) + : SentorGuard(node, Options()) { +} + +SentorGuard::SentorGuard(rclcpp::Node::SharedPtr node, const Options& options) + : node_(node), options_(options), + last_state_time_(node->get_clock()->now()), + last_mode_time_(node->get_clock()->now()) { + + // Create subscriptions + state_sub_ = node_->create_subscription( + options_.state_topic, 10, + std::bind(&SentorGuard::stateCallback, this, std::placeholders::_1)); + + mode_sub_ = node_->create_subscription( + options_.mode_topic, 10, + std::bind(&SentorGuard::modeCallback, this, std::placeholders::_1)); + + // Create publisher for blocking status + status_publisher_ = node_->create_publisher( + "/sentor_guard/blocking_reason", 10); + + RCLCPP_INFO(node_->get_logger(), + "SentorGuard initialized: required_state='%s', update_timeout=%ldms", + options_.required_state.c_str(), options_.update_timeout.count()); +} + +void SentorGuard::stateCallback(const std_msgs::msg::String::SharedPtr msg) { + std::lock_guard lock(mutex_); + current_state_ = msg->data; + last_state_time_ = node_->get_clock()->now(); + checkConditions(); +} + +void SentorGuard::modeCallback(const std_msgs::msg::Bool::SharedPtr msg) { + std::lock_guard lock(mutex_); + autonomous_mode_ = msg->data; + last_mode_time_ = node_->get_clock()->now(); + checkConditions(); +} + +void SentorGuard::checkConditions() { + auto now = node_->get_clock()->now(); + + // Check if we have received state message + if (current_state_.empty()) { + condition_met_ = false; + return; + } + + // Check if state message is recent + auto state_age = now - last_state_time_; + if (state_age > rclcpp::Duration(options_.update_timeout)) { + condition_met_ = false; + return; + } + + // Check state value + if (current_state_ != options_.required_state) { + condition_met_ = false; + return; + } + + // Check if mode message is recent + auto mode_age = now - last_mode_time_; + if (mode_age > rclcpp::Duration(options_.update_timeout)) { + condition_met_ = false; + return; + } + + // Check autonomous mode + if (options_.require_autonomous_mode && !autonomous_mode_) { + condition_met_ = false; + return; + } + + // All conditions met + bool was_met = condition_met_; + condition_met_ = true; + + if (!was_met && condition_met_) { + cv_.notify_all(); + } +} + +bool SentorGuard::isAutonomyAllowed() { + std::lock_guard lock(mutex_); + checkConditions(); // Recheck heartbeat age + return condition_met_; +} + +std::string SentorGuard::getBlockingReason() const { + std::lock_guard lock(mutex_); + auto now = node_->get_clock()->now(); + + // Check state + if (current_state_.empty()) { + return "Robot state not received"; + } + + auto state_age = now - last_state_time_; + if (state_age > rclcpp::Duration(options_.update_timeout)) { + return "Robot state stale (" + std::to_string(state_age.seconds()) + "s old)"; + } + + if (current_state_ != options_.required_state) { + return "State is '" + current_state_ + "', required '" + options_.required_state + "'"; + } + + // Check mode + auto mode_age = now - last_mode_time_; + if (mode_age > rclcpp::Duration(options_.update_timeout)) { + return "Autonomous mode stale (" + std::to_string(mode_age.seconds()) + "s old)"; + } + + if (options_.require_autonomous_mode && !autonomous_mode_) { + return "Autonomous mode is disabled"; + } + + return "Unknown reason"; +} + +std::vector SentorGuard::getTruncatedCallStack(int max_frames) { + std::vector result; + + #ifdef __GNUC__ + void* addresses[max_frames + 5]; // Extra frames to skip + int size = backtrace(addresses, max_frames + 5); + char** symbols = backtrace_symbols(addresses, size); + + // Skip first 3 frames (this function and guard methods) + for (int i = 3; i < std::min(size, max_frames + 3); ++i) { + std::string frame = symbols[i]; + + // Try to demangle C++ symbols + size_t begin = frame.find('('); + size_t end = frame.find('+', begin); + if (begin != std::string::npos && end != std::string::npos) { + std::string mangled = frame.substr(begin + 1, end - begin - 1); + int status; + char* demangled = abi::__cxa_demangle(mangled.c_str(), nullptr, nullptr, &status); + if (status == 0 && demangled) { + result.push_back(demangled); + free(demangled); + } else { + result.push_back(frame); + } + } else { + result.push_back(frame); + } + } + + free(symbols); + #else + result.push_back("Call stack not available on this platform"); + #endif + + return result; +} + +void SentorGuard::publishBlockingStatus(bool is_blocking, const std::vector& call_stack) { + if (!status_publisher_) { + return; + } + + try { + sentor_msgs::msg::GuardStatus msg; + msg.node_name = node_->get_name(); + msg.is_blocking = is_blocking; + + if (is_blocking) { + msg.blocking_reason = getBlockingReason(); + msg.call_stack = call_stack; + auto now = node_->get_clock()->now(); + msg.blocked_at = now; + msg.blocked_duration = 0.0; + } else { + // Guard is passing after being blocked + msg.blocking_reason = ""; + msg.call_stack.clear(); + if (is_currently_blocking_) { + auto now = node_->get_clock()->now(); + auto duration = now - blocking_start_time_; + msg.blocked_duration = duration.seconds(); + msg.blocked_at = blocking_start_time_; + } else { + msg.blocked_duration = 0.0; + msg.blocked_at = rclcpp::Time(0); + } + } + + status_publisher_->publish(msg); + + } catch (const std::exception& e) { + RCLCPP_WARN(node_->get_logger(), "Failed to publish blocking status: %s", e.what()); + } +} + +bool SentorGuard::waitForAutonomy(std::chrono::milliseconds timeout) { + auto start = std::chrono::steady_clock::now(); + bool first_block = true; + + while (rclcpp::ok()) { + { + std::unique_lock lock(mutex_); + checkConditions(); + + if (condition_met_) { + // If we were previously blocking, publish that we're now passing + if (is_currently_blocking_) { + publishBlockingStatus(false); + is_currently_blocking_ = false; + blocking_call_stack_.clear(); + } + return true; + } + + // We're blocking - publish status on first block + if (first_block) { + first_block = false; + is_currently_blocking_ = true; + blocking_start_time_ = node_->get_clock()->now(); + blocking_call_stack_ = getTruncatedCallStack(); + publishBlockingStatus(true, blocking_call_stack_); + } + + if (timeout.count() > 0) { + auto elapsed = std::chrono::steady_clock::now() - start; + auto remaining = timeout - std::chrono::duration_cast(elapsed); + + if (remaining.count() <= 0) { + auto reason = getBlockingReason(); + RCLCPP_WARN(node_->get_logger(), + "Autonomy not granted within %ldms: %s", + timeout.count(), reason.c_str()); + return false; + } + + cv_.wait_for(lock, std::chrono::milliseconds(100)); + } else { + cv_.wait_for(lock, std::chrono::milliseconds(100)); + } + } + + // Spin once to process callbacks + rclcpp::spin_some(node_); + } + + return false; +} + +void SentorGuard::guardedWait(std::chrono::milliseconds timeout) { + if (!waitForAutonomy(timeout)) { + auto reason = getBlockingReason(); + if (timeout.count() > 0) { + throw AutonomyGuardException( + "Autonomy not granted within " + std::to_string(timeout.count()) + + "ms timeout: " + reason); + } else { + throw AutonomyGuardException("Autonomy not allowed: " + reason); + } + } +} + +} // namespace sentor_guard diff --git a/src/sentor_guard/src/lifecycle_guard_node.cpp b/src/sentor_guard/src/lifecycle_guard_node.cpp new file mode 100644 index 0000000..a0309d2 --- /dev/null +++ b/src/sentor_guard/src/lifecycle_guard_node.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include "sentor_guard/guard.hpp" +#include + +/** + * @brief Lifecycle Guard Node - manages lifecycle state of other nodes based on guard conditions + */ +class LifecycleGuardNode : public rclcpp::Node { +public: + LifecycleGuardNode() : Node("lifecycle_guard_node") { + // Declare parameters + declare_parameter("managed_nodes", std::vector{}); + declare_parameter("check_rate", 10.0); + } + + void initialize() { + auto managed_nodes = get_parameter("managed_nodes").as_string_array(); + + if (managed_nodes.empty()) { + RCLCPP_WARN(get_logger(), "No managed nodes specified"); + } + + // Initialize guard + guard_ = std::make_unique(shared_from_this()); + + // Create service clients for each managed node + for (const auto& node_name : managed_nodes) { + auto client = create_client( + node_name + "/change_state"); + lifecycle_clients_[node_name] = client; + } + + // Create timer to check conditions periodically + double rate = get_parameter("check_rate").as_double(); + timer_ = create_wall_timer( + std::chrono::milliseconds(static_cast(1000.0 / rate)), + std::bind(&LifecycleGuardNode::checkCallback, this)); + + RCLCPP_INFO(get_logger(), "Lifecycle guard managing %zu nodes", managed_nodes.size()); + } + +private: + void checkCallback() { + bool allowed = guard_->isAutonomyAllowed(); + + if (allowed && !currently_active_) { + activateNodes(); + } else if (!allowed && currently_active_) { + deactivateNodes(); + } + } + + void activateNodes() { + RCLCPP_INFO(get_logger(), "Activating managed nodes"); + + for (auto& [node_name, client] : lifecycle_clients_) { + auto request = std::make_shared(); + request->transition.id = lifecycle_msgs::msg::Transition::TRANSITION_ACTIVATE; + + if (client->wait_for_service(std::chrono::seconds(1))) { + auto future = client->async_send_request(request); + // Note: In production, you'd want to wait for the response + } else { + RCLCPP_WARN(get_logger(), "Service not available for %s", node_name.c_str()); + } + } + + currently_active_ = true; + } + + void deactivateNodes() { + RCLCPP_INFO(get_logger(), "Deactivating managed nodes: %s", + guard_->getBlockingReason().c_str()); + + for (auto& [node_name, client] : lifecycle_clients_) { + auto request = std::make_shared(); + request->transition.id = lifecycle_msgs::msg::Transition::TRANSITION_DEACTIVATE; + + if (client->wait_for_service(std::chrono::seconds(1))) { + auto future = client->async_send_request(request); + // Note: In production, you'd want to wait for the response + } + } + + currently_active_ = false; + } + + std::unique_ptr guard_; + std::map::SharedPtr> lifecycle_clients_; + rclcpp::TimerBase::SharedPtr timer_; + bool currently_active_{false}; +}; + +int main(int argc, char** argv) { + rclcpp::init(argc, argv); + auto node = std::make_shared(); + node->initialize(); + rclcpp::spin(node); + rclcpp::shutdown(); + return 0; +} diff --git a/src/sentor_guard/src/topic_guard_node.cpp b/src/sentor_guard/src/topic_guard_node.cpp new file mode 100644 index 0000000..e251e6a --- /dev/null +++ b/src/sentor_guard/src/topic_guard_node.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include "sentor_guard/guard.hpp" + +/** + * @brief Topic Guard Node - forwards messages only when guard conditions are met + * + * This is a simplified prototype that forwards serialized messages. + */ +class TopicGuardNode : public rclcpp::Node { +public: + TopicGuardNode() : Node("topic_guard_node") { + // Declare parameters + declare_parameter("input_topic", ""); + declare_parameter("output_topic", ""); + declare_parameter("message_type", ""); + declare_parameter("required_state", "active"); + declare_parameter("update_timeout", 1.0); + } + + void initialize() { + // Get parameters + std::string input_topic = get_parameter("input_topic").as_string(); + std::string output_topic = get_parameter("output_topic").as_string(); + std::string message_type = get_parameter("message_type").as_string(); + + if (input_topic.empty() || output_topic.empty() || message_type.empty()) { + RCLCPP_ERROR(get_logger(), + "Required parameters: input_topic, output_topic, message_type"); + throw std::runtime_error("Missing required parameters"); + } + + // Initialize guard + sentor_guard::SentorGuard::Options guard_options; + guard_options.required_state = get_parameter("required_state").as_string(); + guard_options.update_timeout = std::chrono::milliseconds( + static_cast(get_parameter("update_timeout").as_double() * 1000)); + + guard_ = std::make_unique(shared_from_this(), guard_options); + + // Create generic publisher and subscriber + publisher_ = create_generic_publisher(output_topic, message_type, 10); + subscriber_ = create_generic_subscription( + input_topic, message_type, 10, + std::bind(&TopicGuardNode::messageCallback, this, std::placeholders::_1)); + + RCLCPP_INFO(get_logger(), + "Topic guard initialized: %s -> %s (type: %s)", + input_topic.c_str(), output_topic.c_str(), message_type.c_str()); + } + +private: + void messageCallback(std::shared_ptr msg) { + if (guard_->isAutonomyAllowed()) { + // Forward message + publisher_->publish(*msg); + messages_forwarded_++; + } else { + // Drop message + messages_dropped_++; + + if (messages_dropped_ % 100 == 0) { + RCLCPP_WARN(get_logger(), + "Dropped %ld messages: %s", + messages_dropped_, guard_->getBlockingReason().c_str()); + } + } + } + + std::unique_ptr guard_; + rclcpp::GenericPublisher::SharedPtr publisher_; + rclcpp::GenericSubscription::SharedPtr subscriber_; + size_t messages_forwarded_{0}; + size_t messages_dropped_{0}; +}; + +int main(int argc, char** argv) { + rclcpp::init(argc, argv); + auto node = std::make_shared(); + node->initialize(); + rclcpp::spin(node); + rclcpp::shutdown(); + return 0; +} diff --git a/src/sentor_guard/test/test_bt_condition_node.cpp b/src/sentor_guard/test/test_bt_condition_node.cpp new file mode 100644 index 0000000..8d0aef6 --- /dev/null +++ b/src/sentor_guard/test/test_bt_condition_node.cpp @@ -0,0 +1,231 @@ +/** + * @file test_bt_condition_node.cpp + * @brief Tests for CheckAutonomyAllowed BT condition node + */ + +#include +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "std_msgs/msg/string.hpp" +#include "std_msgs/msg/bool.hpp" +#include "behaviortree_cpp/bt_factory.h" +#include "sentor_guard/bt_condition_node.hpp" + +using namespace std::chrono_literals; + +class TestBTConditionNode : public ::testing::Test +{ +protected: + void SetUp() override + { + rclcpp::init(0, nullptr); + test_node_ = std::make_shared("test_bt_node"); + + // Create publishers for test topics + state_pub_ = test_node_->create_publisher("/robot_state", 10); + mode_pub_ = test_node_->create_publisher("/autonomous_mode", 10); + + // Wait for subscriptions to connect + std::this_thread::sleep_for(100ms); + } + + void TearDown() override + { + test_node_.reset(); + rclcpp::shutdown(); + } + + void publishAllConditionsMet() + { + auto state_msg = std_msgs::msg::String(); + state_msg.data = "active"; + state_pub_->publish(state_msg); + + auto mode_msg = std_msgs::msg::Bool(); + mode_msg.data = true; + mode_pub_->publish(mode_msg); + + // Spin to process messages + rclcpp::spin_some(test_node_); + std::this_thread::sleep_for(100ms); + } + + void publishWrongState() + { + auto state_msg = std_msgs::msg::String(); + state_msg.data = "paused"; + state_pub_->publish(state_msg); + + rclcpp::spin_some(test_node_); + std::this_thread::sleep_for(100ms); + } + + rclcpp::Node::SharedPtr test_node_; + rclcpp::Publisher::SharedPtr state_pub_; + rclcpp::Publisher::SharedPtr mode_pub_; +}; + +TEST_F(TestBTConditionNode, FactoryCreation) +{ + BT::BehaviorTreeFactory factory; + + // Register the node + factory.registerBuilder( + "CheckAutonomyAllowed", + [this](const std::string & name, const BT::NodeConfiguration & config) { + return std::make_unique(name, config, test_node_); + }); + + // Verify it can be created + EXPECT_NO_THROW({ + auto tree = factory.createTreeFromText(R"( + + + + + + )"); + }); +} + +TEST_F(TestBTConditionNode, AllConditionsMetReturnsSuccess) +{ + BT::BehaviorTreeFactory factory; + + factory.registerBuilder( + "CheckAutonomyAllowed", + [this](const std::string & name, const BT::NodeConfiguration & config) { + return std::make_unique(name, config, test_node_); + }); + + auto tree = factory.createTreeFromText(R"( + + + + + + )"); + + // Publish all conditions met + publishAllConditionsMet(); + + // Tick the tree + auto status = tree.tickWhileRunning(); + + EXPECT_EQ(status, BT::NodeStatus::SUCCESS); +} + +TEST_F(TestBTConditionNode, WrongStateReturnsFailure) +{ + BT::BehaviorTreeFactory factory; + + factory.registerBuilder( + "CheckAutonomyAllowed", + [this](const std::string & name, const BT::NodeConfiguration & config) { + return std::make_unique(name, config, test_node_); + }); + + auto tree = factory.createTreeFromText(R"( + + + + + + )"); + + // Publish wrong state + publishWrongState(); + + // Tick the tree + auto status = tree.tickWhileRunning(); + + EXPECT_EQ(status, BT::NodeStatus::FAILURE); +} + +TEST_F(TestBTConditionNode, CustomTopicsWork) +{ + BT::BehaviorTreeFactory factory; + + // Create publishers for custom topics + auto custom_state_pub = test_node_->create_publisher( + "/custom/state", 10); + auto custom_mode_pub = test_node_->create_publisher( + "/custom/mode", 10); + + std::this_thread::sleep_for(100ms); + + factory.registerBuilder( + "CheckAutonomyAllowed", + [this](const std::string & name, const BT::NodeConfiguration & config) { + return std::make_unique(name, config, test_node_); + }); + + auto tree = factory.createTreeFromText(R"( + + + + + + )"); + + // Publish on custom topics + auto state_msg = std_msgs::msg::String(); + state_msg.data = "running"; + custom_state_pub->publish(state_msg); + + auto mode_msg = std_msgs::msg::Bool(); + mode_msg.data = true; + custom_mode_pub->publish(mode_msg); + + rclcpp::spin_some(test_node_); + std::this_thread::sleep_for(150ms); + + auto status = tree.tickWhileRunning(); + + EXPECT_EQ(status, BT::NodeStatus::SUCCESS); +} + +TEST_F(TestBTConditionNode, ContinuousCheckingRespondsToChanges) +{ + BT::BehaviorTreeFactory factory; + + factory.registerBuilder( + "CheckAutonomyAllowed", + [this](const std::string & name, const BT::NodeConfiguration & config) { + return std::make_unique(name, config, test_node_); + }); + + auto tree = factory.createTreeFromText(R"( + + + + + + )"); + + // Initially all conditions met + publishAllConditionsMet(); + EXPECT_EQ(tree.tickWhileRunning(), BT::NodeStatus::SUCCESS); + + // Change state to paused + publishWrongState(); + EXPECT_EQ(tree.tickWhileRunning(), BT::NodeStatus::FAILURE); + + // Return to active state + publishAllConditionsMet(); + EXPECT_EQ(tree.tickWhileRunning(), BT::NodeStatus::SUCCESS); +} + +int main(int argc, char ** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/sentor_guard/test/test_python_guard.py b/src/sentor_guard/test/test_python_guard.py new file mode 100644 index 0000000..b60dad7 --- /dev/null +++ b/src/sentor_guard/test/test_python_guard.py @@ -0,0 +1,210 @@ +"""Tests for Python SentorGuard.""" + +import unittest +import rclpy +from rclpy.node import Node +from std_msgs.msg import String, Bool +from sentor_msgs.msg import GuardStatus +from sentor_guard import SentorGuard, AutonomyGuardException, sentor_guarded +import time + + +class TestSentorGuard(unittest.TestCase): + """Test cases for SentorGuard.""" + + @classmethod + def setUpClass(cls): + """Initialize ROS.""" + rclpy.init() + + @classmethod + def tearDownClass(cls): + """Shutdown ROS.""" + rclpy.shutdown() + + def setUp(self): + """Set up test fixtures.""" + self.node = Node('test_guard_node') + self.guard = SentorGuard(self.node, update_timeout=0.5) + + # Create test publishers + self.state_pub = self.node.create_publisher(String, '/robot_state', 10) + self.mode_pub = self.node.create_publisher(Bool, '/autonomous_mode', 10) + + time.sleep(0.1) # Allow subscriptions to connect + + def tearDown(self): + """Clean up test fixtures.""" + self.node.destroy_node() + + def test_all_conditions_met(self): + """Test that guard allows when all conditions are met.""" + # Publish required state + msg = String() + msg.data = 'active' + self.state_pub.publish(msg) + + # Publish autonomous mode + mode_msg = Bool() + mode_msg.data = True + self.mode_pub.publish(mode_msg) + + # Spin to process messages + for _ in range(3): + rclpy.spin_once(self.node, timeout_sec=0.1) + + # Check that autonomy is allowed + self.assertTrue(self.guard.is_autonomy_allowed()) + + def test_wrong_state_blocks(self): + """Test that wrong state blocks autonomy.""" + msg = String() + msg.data = 'paused' + self.state_pub.publish(msg) + + rclpy.spin_once(self.node, timeout_sec=0.1) + + self.assertFalse(self.guard.is_autonomy_allowed()) + self.assertIn("State is", self.guard.get_blocking_reason()) + + def test_mode_disabled_blocks(self): + """Test that disabled autonomous mode blocks autonomy.""" + # Set valid state + state_msg = String() + state_msg.data = 'active' + self.state_pub.publish(state_msg) + + # Set mode to false + mode_msg = Bool() + mode_msg.data = False + self.mode_pub.publish(mode_msg) + + rclpy.spin_once(self.node, timeout_sec=0.1) + + self.assertFalse(self.guard.is_autonomy_allowed()) + self.assertIn("mode", self.guard.get_blocking_reason().lower()) + + def test_timeout_exception(self): + """Test that timeout raises exception.""" + with self.assertRaises(AutonomyGuardException): + self.guard.guarded_wait(timeout=0.1) + + +class TestSentorGuardDecorator(unittest.TestCase): + """Test cases for @sentor_guarded decorator.""" + + @classmethod + def setUpClass(cls): + """Initialize ROS.""" + if not rclpy.ok(): + rclpy.init() + + def setUp(self): + """Set up test fixtures.""" + self.node = Node('test_decorator_node') + self.guard = SentorGuard(self.node, update_timeout=0.5) + + # Create test publishers + self.state_pub = self.node.create_publisher(String, '/robot_state', 10) + self.mode_pub = self.node.create_publisher(Bool, '/autonomous_mode', 10) + + self.blocking_reason_sub = self.node.create_subscription(GuardStatus, '/sentor_guard/blocking_reason', self.blocking_reason_callback, 10) + time.sleep(0.1) # Allow subscriptions to connect + + def blocking_reason_callback(self, msg): + """Callback to receive blocking reason messages.""" + + print(f"Blocking reason received: {msg}") + + def tearDown(self): + """Clean up test fixtures.""" + self.node.destroy_node() + + def _publish_all_conditions_met(self): + """Helper to publish all conditions as met.""" + state_msg = String() + state_msg.data = 'active' + self.state_pub.publish(state_msg) + + mode_msg = Bool() + mode_msg.data = True + self.mode_pub.publish(mode_msg) + + # Spin to process messages + for _ in range(5): + rclpy.spin_once(self.node, timeout_sec=0.1) + + def test_decorator_with_timeout_blocks(self): + """Test that decorator raises exception on timeout.""" + # Create a test class with guard attribute + class TestClass: + def __init__(self, guard): + self.guard = guard + self.called = False + + @sentor_guarded(timeout=0.1) + def guarded_method(self): + self.called = True + + test_obj = TestClass(self.guard) + + # Should raise exception since conditions are not met + with self.assertRaises(AutonomyGuardException): + test_obj.guarded_method() + + # Method should not have been called + self.assertFalse(test_obj.called) + + def test_decorator_allows_when_conditions_met(self): + """Test that decorator allows execution when conditions are met.""" + # Publish all conditions as met + self._publish_all_conditions_met() + + # Create a test class with guard attribute + class TestClass: + def __init__(self, guard): + self.guard = guard + self.called = False + + @sentor_guarded(timeout=1.0) + def guarded_method(self): + self.called = True + return "success" + + test_obj = TestClass(self.guard) + + # Should execute successfully + result = test_obj.guarded_method() + + # Method should have been called + self.assertTrue(test_obj.called) + self.assertEqual(result, "success") + + def test_decorator_with_explicit_guard(self): + """Test decorator with explicit guard parameter.""" + self._publish_all_conditions_met() + + called = [False] # Use list to capture in closure + + @sentor_guarded(guard=self.guard, timeout=1.0) + def standalone_function(): + called[0] = True + return "executed" + + result = standalone_function() + self.assertTrue(called[0]) + self.assertEqual(result, "executed") + + def test_decorator_without_guard_raises_error(self): + """Test that decorator raises ValueError when no guard available.""" + @sentor_guarded(timeout=0.1) + def no_guard_function(): + pass + + # Should raise ValueError since no guard is available + with self.assertRaises(ValueError): + no_guard_function() + + +if __name__ == '__main__': + unittest.main() diff --git a/src/sentor_msgs/CMakeLists.txt b/src/sentor_msgs/CMakeLists.txt index e5c50d4..a18713e 100644 --- a/src/sentor_msgs/CMakeLists.txt +++ b/src/sentor_msgs/CMakeLists.txt @@ -8,6 +8,7 @@ find_package(std_srvs REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "msg/TopicMap.msg" + "msg/GuardStatus.msg" "msg/TopicMapArray.msg" "msg/SentorEvent.msg" "msg/Monitor.msg" diff --git a/src/sentor_msgs/msg/GuardStatus.msg b/src/sentor_msgs/msg/GuardStatus.msg new file mode 100644 index 0000000..9074be7 --- /dev/null +++ b/src/sentor_msgs/msg/GuardStatus.msg @@ -0,0 +1,20 @@ +# Guard blocking/unblocking status message +# Published when a guard blocks and when it unblocks + +# Node name where the guard blocked +string node_name + +# Whether the guard is currently blocking (true) or just passed (false) +bool is_blocking + +# Reason for blocking (empty if not blocking) +string blocking_reason + +# Truncated call stack where the guard blocked (max 10 frames) +string[] call_stack + +# Time when blocking started (seconds since epoch) +builtin_interfaces/Time blocked_at + +# Duration of blocking in seconds (0 if still blocking, >0 if just unblocked) +float64 blocked_duration