From 927af54ac8db6b5224dedc76e946ea8026d9fa2e Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:06:33 +0700
Subject: [PATCH 01/17] Add Telegram alert script for notifications

---
 health/alert_telegram.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 health/alert_telegram.sh

diff --git a/health/alert_telegram.sh b/health/alert_telegram.sh
new file mode 100644
index 00000000..1743d99d
--- /dev/null
+++ b/health/alert_telegram.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+BOT_TOKEN="ISI_DENGAN_BOT_TOKEN_KAMU"
+CHAT_ID="ISI_DENGAN_CHAT_ID_KAMU"
+
+MSG="$1"
+
+curl -s -X POST "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \
+  -d chat_id="${CHAT_ID}" \
+  -d text="🚨 [Pi Node Alert]\n${MSG}" \
+  -d parse_mode="HTML"

From 013aaeb204d13f0174046740d993f9fae8c5c1d6 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:07:08 +0700
Subject: [PATCH 02/17] Add auto_recover.sh for health monitoring

Implement an auto-recovery script that checks node health and restarts services if unhealthy.
---
 health/health/auto_recover.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 health/health/auto_recover.sh

diff --git a/health/health/auto_recover.sh b/health/health/auto_recover.sh
new file mode 100644
index 00000000..f6d66ea0
--- /dev/null
+++ b/health/health/auto_recover.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+while true; do
+  /health/healthcheck.sh
+  if [ $? -ne 0 ]; then
+    MSG="Node unhealthy. Restarting services..."
+    echo "[AUTO] $MSG"
+    /health/alert_telegram.sh "$MSG"
+    supervisorctl restart stellar-core
+    supervisorctl restart horizon
+  fi
+  sleep 60
+done

From bce730e3cd3f50f7bf9cdfe69b6fe77550c58809 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:08:27 +0700
Subject: [PATCH 03/17] Add supervisord configuration for auto_recover program

---
 health/health/supervisord.conf | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 health/health/supervisord.conf

diff --git a/health/health/supervisord.conf b/health/health/supervisord.conf
new file mode 100644
index 00000000..8b8ee728
--- /dev/null
+++ b/health/health/supervisord.conf
@@ -0,0 +1,6 @@
+[program:auto_recover]
+command=/health/auto_recover.sh
+autostart=true
+autorestart=true
+stderr_logfile=/var/log/supervisor/auto_recover.err.log
+stdout_logfile=/var/log/supervisor/auto_recover.out.log

From 3e4469f935f42509909106ffc60325f46ab42f43 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:09:12 +0700
Subject: [PATCH 04/17] Create node_metrics.sh for service health checks

Add a script to check the status of Horizon and Stellar-core services.
---
 health/health/metrics/node_metrics.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 health/health/metrics/node_metrics.sh

diff --git a/health/health/metrics/node_metrics.sh b/health/health/metrics/node_metrics.sh
new file mode 100644
index 00000000..a1b08614
--- /dev/null
+++ b/health/health/metrics/node_metrics.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+HORIZON_OK=$(curl -sf http://localhost:8000/ > /dev/null && echo 1 || echo 0)
+CORE_OK=$(curl -sf http://localhost:11626/info > /dev/null && echo 1 || echo 0)
+
+echo "# HELP pi_node_horizon_up Horizon service status"
+echo "# TYPE pi_node_horizon_up gauge"
+echo "pi_node_horizon_up $HORIZON_OK"
+
+echo "# HELP pi_node_core_up Stellar-core service status"
+echo "# TYPE pi_node_core_up gauge"
+echo "pi_node_core_up $CORE_OK"

From 36117a9e9e77c5e58a5bbe567189bdd774650e77 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:09:47 +0700
Subject: [PATCH 05/17] Add metrics server script to serve metrics

---
 health/health/metrics/metrics/metrics_server.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 health/health/metrics/metrics/metrics_server.sh

diff --git a/health/health/metrics/metrics/metrics_server.sh b/health/health/metrics/metrics/metrics_server.sh
new file mode 100644
index 00000000..033fadfb
--- /dev/null
+++ b/health/health/metrics/metrics/metrics_server.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+PORT=9105
+
+while true; do
+  {
+    echo -e "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\n"
+    /metrics/node_metrics.sh
+  } | nc -l -p $PORT -q 1
+done

From cb6eb368abe73dd1455ce9a7cd3dd0c978eef313 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:10:22 +0700
Subject: [PATCH 06/17] Add supervisord configuration for metrics server

---
 health/health/metrics/metrics/supervisord.conf | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 health/health/metrics/metrics/supervisord.conf

diff --git a/health/health/metrics/metrics/supervisord.conf b/health/health/metrics/metrics/supervisord.conf
new file mode 100644
index 00000000..9e596f02
--- /dev/null
+++ b/health/health/metrics/metrics/supervisord.conf
@@ -0,0 +1,6 @@
+[program:metrics_server]
+command=/metrics/metrics_server.sh
+autostart=true
+autorestart=true
+stderr_logfile=/var/log/supervisor/metrics.err.log
+stdout_logfile=/var/log/supervisor/metrics.out.log

From fd7e2ea21713b2f459218fc34cb65e2031bcc022 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:11:02 +0700
Subject: [PATCH 07/17] Add Prometheus configuration for pi-node monitoring

---
 health/health/metrics/metrics/monitoring/prometheus.yml | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 health/health/metrics/metrics/monitoring/prometheus.yml

diff --git a/health/health/metrics/metrics/monitoring/prometheus.yml b/health/health/metrics/metrics/monitoring/prometheus.yml
new file mode 100644
index 00000000..1f00d8a9
--- /dev/null
+++ b/health/health/metrics/metrics/monitoring/prometheus.yml
@@ -0,0 +1,7 @@
+global:
+  scrape_interval: 15s
+
+scrape_configs:
+  - job_name: "pi-node"
+    static_configs:
+      - targets: ["pi-node:9105"]

From cc2f2aa73bff414790e1b5fb2a9fc82ee84df4fa Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:12:30 +0700
Subject: [PATCH 08/17] Add Docker Compose configuration for monitoring
 services

---
 docker-compose.monitoring.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 docker-compose.monitoring.yml

diff --git a/docker-compose.monitoring.yml b/docker-compose.monitoring.yml
new file mode 100644
index 00000000..0f1be202
--- /dev/null
+++ b/docker-compose.monitoring.yml
@@ -0,0 +1,16 @@
+version: "3.9"
+
+services:
+  prometheus:
+    image: prom/prometheus
+    container_name: pi-prometheus
+    volumes:
+      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
+    ports:
+      - "9090:9090"
+
+  grafana:
+    image: grafana/grafana
+    container_name: pi-grafana
+    ports:
+      - "3000:3000"

From 2b9f339ec73f96a0ef9c07622d38525d8d6773f1 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:18:32 +0700
Subject: [PATCH 09/17] Add healthcheck script for Horizon and stellar-core

This script checks the health of the Horizon and stellar-core services by making HTTP requests and reporting their status.
---
 health/healthcheck.sh | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 health/healthcheck.sh

diff --git a/health/healthcheck.sh b/health/healthcheck.sh
new file mode 100644
index 00000000..49b9680a
--- /dev/null
+++ b/health/healthcheck.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+set -e
+
+HORIZON_URL="http://localhost:8000/"
+CORE_INFO_URL="http://localhost:11626/info"
+
+# Check Horizon
+if ! curl -sf "$HORIZON_URL" > /dev/null; then
+  echo "[HEALTH] Horizon is DOWN"
+  exit 1
+fi
+
+# Check stellar-core
+if ! curl -sf "$CORE_INFO_URL" > /dev/null; then
+  echo "[HEALTH] stellar-core is DOWN"
+  exit 1
+fi
+
+echo "[HEALTH] Pi Node is HEALTHY"
+exit 0

From 89e8c4e973bf15a3c348c0621547bdc4d89a3b5d Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:20:14 +0700
Subject: [PATCH 10/17] Add health check support to Dockerfile

Add health check script and configure Docker healthcheck.
---
 Dockerfile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 2b48c5fb..3017cc97 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,3 +38,10 @@ ADD start /
 RUN ["chmod", "+x", "start"]
 
 ENTRYPOINT ["/start"]
+# Copy health check script
+COPY health/healthcheck.sh /health/healthcheck.sh
+RUN chmod +x /health/healthcheck.sh
+
+# Docker native healthcheck
+HEALTHCHECK --interval=30s --timeout=5s --retries=3 --start-period=60s \
+  CMD /health/healthcheck.sh || exit 1

From d739fba08b534e080543505cdcc36584f833072a Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:23:17 +0700
Subject: [PATCH 11/17] Add Docker Compose configuration for mainnet service

---
 docker-compose.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 docker-compose.yml

diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..55d4dc13
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,13 @@
+services:
+  mainnet:
+    image: pinetwork/pi-node-docker:organization_mainnet-v1.3-p19.6
+    container_name: pi-node
+    restart: unless-stopped
+    volumes:
+      - ./data/stellar:/opt/stellar
+      - ./data/logs:/var/log/supervisor
+    ports:
+      - "31401:8000"
+      - "31402:31402"
+      - "31403:1570"
+    command: ["--mainnet", "--enable-auto-migrations"]

From 79d056a0c6aed6a83b66f5d9339113a3a4434c94 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:23:55 +0700
Subject: [PATCH 12/17] Add auto-recovery script for service monitoring

This script continuously checks the health of services and restarts them if they are not healthy.
---
 health/auto_recover.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 health/auto_recover.sh

diff --git a/health/auto_recover.sh b/health/auto_recover.sh
new file mode 100644
index 00000000..6265794d
--- /dev/null
+++ b/health/auto_recover.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+while true; do
+  /health/healthcheck.sh
+  if [ $? -ne 0 ]; then
+    echo "[AUTO-RECOVER] Restarting services..."
+    supervisorctl restart stellar-core
+    supervisorctl restart horizon
+  fi
+  sleep 60
+done

From 25b815e3eb5c88e782835b8610d1e4f6d27d48b2 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 01:25:51 +0700
Subject: [PATCH 13/17] Add auto_recover program configuration to supervisord

---
 supervisord.conf | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 supervisord.conf

diff --git a/supervisord.conf b/supervisord.conf
new file mode 100644
index 00000000..8b8ee728
--- /dev/null
+++ b/supervisord.conf
@@ -0,0 +1,6 @@
+[program:auto_recover]
+command=/health/auto_recover.sh
+autostart=true
+autorestart=true
+stderr_logfile=/var/log/supervisor/auto_recover.err.log
+stdout_logfile=/var/log/supervisor/auto_recover.out.log

From 605486db9b35177d8951e9cab4f8ceea85a06e90 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 09:33:27 +0700
Subject: [PATCH 14/17] Add healthcheck_v2.sh for service and disk monitoring

Implement a health check script to monitor services and disk space.
---
 health/healthcheck_v2.sh | 47 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 health/healthcheck_v2.sh

diff --git a/health/healthcheck_v2.sh b/health/healthcheck_v2.sh
new file mode 100644
index 00000000..ff39c4a2
--- /dev/null
+++ b/health/healthcheck_v2.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+LOG_FILE="/var/log/pi-node-health.log"
+MIN_DISK_GB=10
+HORIZON_URL="http://localhost:8000"
+CORE_PORT=11626
+
+log() {
+  echo "[$(date '+%Y-%m-%d %H:%M:%S')] [HEALTH] $*" | tee -a "$LOG_FILE"
+}
+
+check_service() {
+  local name="$1"
+  local cmd="$2"
+  if eval "$cmd" >/dev/null 2>&1; then
+    log "OK: $name"
+    return 0
+  else
+    log "FAIL: $name"
+    return 1
+  fi
+}
+
+check_disk() {
+  local avail
+  avail=$(df -BG / | awk 'NR==2{gsub("G","",$4);print $4}')
+  if (( avail < MIN_DISK_GB )); then
+    log "FAIL: Disk space low (${avail}GB)"
+    return 1
+  fi
+  log "OK: Disk space ${avail}GB"
+}
+
+main() {
+  log "Starting health check..."
+
+  check_service "Horizon API" "curl -sf ${HORIZON_URL}/" || return 1
+  check_service "Stellar-Core Port" "nc -z localhost ${CORE_PORT}" || return 1
+  check_service "PostgreSQL" "pg_isready" || return 1
+  check_disk || return 1
+
+  log "Health check PASSED"
+  return 0
+}
+
+main

From 71b1e5a5a5e7d643637ab61ef9f0ed9f29f73137 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 09:35:44 +0700
Subject: [PATCH 15/17] Add healthcheck auto-recovery script

---
 health/health/healthcheck_auto_recover.sh | 46 +++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 health/health/healthcheck_auto_recover.sh

diff --git a/health/health/healthcheck_auto_recover.sh b/health/health/healthcheck_auto_recover.sh
new file mode 100644
index 00000000..fbc443be
--- /dev/null
+++ b/health/health/healthcheck_auto_recover.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CHECK_SCRIPT="/opt/stellar/health/healthcheck.sh"
+ALERT_SCRIPT="/opt/stellar/health/alert_manager.sh"
+LOG_FILE="/var/log/pi-node-recover.log"
+
+MAX_RETRIES=5
+BASE_DELAY=10
+
+log() {
+  echo "[$(date '+%Y-%m-%d %H:%M:%S')] [RECOVER] $*" | tee -a "$LOG_FILE"
+}
+
+attempt_recover() {
+  local retry=0
+  while (( retry < MAX_RETRIES )); do
+    if "$CHECK_SCRIPT"; then
+      log "Node healthy again"
+      "$ALERT_SCRIPT" "Node recovered successfully" "info"
+      return 0
+    fi
+
+    delay=$(( BASE_DELAY * (2 ** retry) ))
+    log "Health failed. Restarting services... attempt=$((retry+1)) wait=${delay}s"
+    "$ALERT_SCRIPT" "Node unhealthy. Restart attempt $((retry+1))" "warning"
+
+    docker compose restart pi-node || true
+    sleep "$delay"
+
+    ((retry++))
+  done
+
+  log "Max retries reached. Node still unhealthy."
+  "$ALERT_SCRIPT" "CRITICAL: Node recovery failed after $MAX_RETRIES attempts" "critical"
+  return 1
+}
+
+main() {
+  if ! "$CHECK_SCRIPT"; then
+    log "Health check failed → starting recovery"
+    attempt_recover
+  fi
+}
+
+main

From 7104a582549ec19e5ecbc532c578e771365c1a40 Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Fri, 6 Feb 2026 09:36:48 +0700
Subject: [PATCH 16/17] Add alert_manager.sh for health alert management

---
 health/health/health/alert_manager.sh | 34 +++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 health/health/health/alert_manager.sh

diff --git a/health/health/health/alert_manager.sh b/health/health/health/alert_manager.sh
new file mode 100644
index 00000000..a0e61d99
--- /dev/null
+++ b/health/health/health/alert_manager.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CONFIG="/opt/stellar/health/alert_config.env"
+LOG_FILE="/var/log/pi-node-alerts.log"
+
+source "$CONFIG"
+
+log() {
+  echo "[$(date '+%Y-%m-%d %H:%M:%S')] [ALERT] $*" >> "$LOG_FILE"
+}
+
+send_telegram() {
+  local msg="$1"
+  curl -s -X POST "https://api.telegram.org/bot${TG_BOT_TOKEN}/sendMessage" \
+    -d chat_id="${TG_CHAT_ID}" \
+    -d text="$msg" >/dev/null
+}
+
+send_alert() {
+  local text="$1"
+  local level="${2:-info}"
+
+  message="[$(hostname)] [$level] $text"
+  log "$message"
+
+  if [[ "${ENABLE_TELEGRAM}" == "true" ]]; then
+    send_telegram "$message"
+  fi
+}
+
+if [[ $# -ge 1 ]]; then
+  send_alert "$1" "${2:-info}"
+fi

From ed241aacab901cc1b2f86a2665ab119842c1e9af Mon Sep 17 00:00:00 2001
From: Kapten boneng <iwak01bogo@gmail.com>
Date: Sun, 8 Feb 2026 11:09:50 +0700
Subject: [PATCH 17/17] Create Grafana dashboard for Pi Node monitoring

Add a new Grafana dashboard for monitoring Pi Node metrics.
---
 grafana-dashboard.json | 65 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 grafana-dashboard.json

diff --git a/grafana-dashboard.json b/grafana-dashboard.json
new file mode 100644
index 00000000..4122efa5
--- /dev/null
+++ b/grafana-dashboard.json
@@ -0,0 +1,65 @@
+{
+  "id": null,
+  "uid": "pi-node-enterprise-monitoring",
+  "title": "Pi Node Enterprise Monitoring",
+  "tags": ["pi-network", "node", "enterprise", "monitoring"],
+  "timezone": "browser",
+  "schemaVersion": 38,
+  "version": 1,
+  "refresh": "10s",
+  "panels": [
+    {
+      "type": "stat",
+      "title": "Node Health",
+      "id": 1,
+      "gridPos": { "x": 0, "y": 0, "w": 8, "h": 4 },
+      "targets": [
+        {
+          "expr": "up{job=\"pi-node\"}",
+          "refId": "A"
+        }
+      ],
+      "options": {
+        "reduceOptions": { "calcs": ["last"], "fields": "", "values": false },
+        "orientation": "auto",
+        "colorMode": "background"
+      }
+    },
+    {
+      "type": "timeseries",
+      "title": "CPU Usage",
+      "id": 2,
+      "gridPos": { "x": 8, "y": 0, "w": 8, "h": 6 },
+      "targets": [
+        {
+          "expr": "100 - (avg by (instance)(irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "type": "timeseries",
+      "title": "Memory Usage",
+      "id": 3,
+      "gridPos": { "x": 16, "y": 0, "w": 8, "h": 6 },
+      "targets": [
+        {
+          "expr": "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "type": "timeseries",
+      "title": "Disk Usage",
+      "id": 4,
+      "gridPos": { "x": 0, "y": 6, "w": 24, "h": 6 },
+      "targets": [
+        {
+          "expr": "100 - (node_filesystem_avail_bytes{mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"} * 100)",
+          "refId": "A"
+        }
+      ]
+    }
+  ]
+}