#!/bin/bash # S² MCP Bridge External Watchdog # Monitors all workers for heartbeat freshness, triggers alerts on silent agents # # Usage: ./watchdog-monitor.sh DB_PATH="/tmp/claude_bridge_coordinator.db" CHECK_INTERVAL=60 # Check every 60 seconds TIMEOUT_THRESHOLD=300 # Alert if no heartbeat for 5 minutes LOG_FILE="/tmp/mcp-watchdog.log" if [ ! -f "$DB_PATH" ]; then echo "āŒ Database not found: $DB_PATH" | tee -a "$LOG_FILE" echo "šŸ’” Tip: Orchestrator must create conversations first" | tee -a "$LOG_FILE" exit 1 fi echo "šŸ• Starting S² MCP Bridge Watchdog" | tee -a "$LOG_FILE" echo "šŸ“Š Monitoring database: $DB_PATH" | tee -a "$LOG_FILE" echo "ā±ļø Check interval: ${CHECK_INTERVAL}s | Timeout threshold: ${TIMEOUT_THRESHOLD}s" | tee -a "$LOG_FILE" # Find reassignment script SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REASSIGN_SCRIPT="$SCRIPT_DIR/reassign-tasks.py" while true; do TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') # Query all worker heartbeats SILENT_WORKERS=$(sqlite3 "$DB_PATH" < $TIMEOUT_THRESHOLD ORDER BY seconds_since DESC; EOF ) if [ -n "$SILENT_WORKERS" ]; then echo "[$TIMESTAMP] 🚨 ALERT: Silent workers detected!" | tee -a "$LOG_FILE" echo "$SILENT_WORKERS" | tee -a "$LOG_FILE" # Trigger reassignment protocol if [ -f "$REASSIGN_SCRIPT" ]; then echo "[$TIMESTAMP] šŸ”„ Triggering task reassignment..." | tee -a "$LOG_FILE" python3 "$REASSIGN_SCRIPT" --silent-workers "$SILENT_WORKERS" 2>&1 | tee -a "$LOG_FILE" else echo "[$TIMESTAMP] āš ļø Reassignment script not found: $REASSIGN_SCRIPT" | tee -a "$LOG_FILE" fi else echo "[$TIMESTAMP] āœ… All workers healthy" >> "$LOG_FILE" fi sleep $CHECK_INTERVAL done