diff --git a/client/Dockerfile b/client/Dockerfile index 13e44096f41..66a41882828 100644 --- a/client/Dockerfile +++ b/client/Dockerfile @@ -17,8 +17,8 @@ ENV \ NETBIRD_BIN="/usr/local/bin/netbird" \ NB_LOG_FILE="console,/var/log/netbird/client.log" \ NB_DAEMON_ADDR="unix:///var/run/netbird.sock" \ - NB_ENTRYPOINT_SERVICE_TIMEOUT="5" \ - NB_ENTRYPOINT_LOGIN_TIMEOUT="5" + NB_ENTRYPOINT_SERVICE_TIMEOUT="30" \ + NB_ENTRYPOINT_LOGIN_TIMEOUT="30" ENTRYPOINT [ "/usr/local/bin/netbird-entrypoint.sh" ] diff --git a/client/Dockerfile-rootless b/client/Dockerfile-rootless index 5fa8de0a5f4..b0d59fdf866 100644 --- a/client/Dockerfile-rootless +++ b/client/Dockerfile-rootless @@ -23,8 +23,8 @@ ENV \ NB_DAEMON_ADDR="unix:///var/lib/netbird/netbird.sock" \ NB_LOG_FILE="console,/var/lib/netbird/client.log" \ NB_DISABLE_DNS="true" \ - NB_ENTRYPOINT_SERVICE_TIMEOUT="5" \ - NB_ENTRYPOINT_LOGIN_TIMEOUT="1" + NB_ENTRYPOINT_SERVICE_TIMEOUT="30" \ + NB_ENTRYPOINT_LOGIN_TIMEOUT="30" ENTRYPOINT [ "/usr/local/bin/netbird-entrypoint.sh" ] diff --git a/client/cmd/status.go b/client/cmd/status.go index f09c35c2c31..07dbf9101ab 100644 --- a/client/cmd/status.go +++ b/client/cmd/status.go @@ -28,6 +28,7 @@ var ( ipsFilterMap map[string]struct{} prefixNamesFilterMap map[string]struct{} connectionTypeFilter string + checkFlag string ) var statusCmd = &cobra.Command{ @@ -49,6 +50,7 @@ func init() { statusCmd.PersistentFlags().StringSliceVar(&prefixNamesFilter, "filter-by-names", []string{}, "filters the detailed output by a list of one or more peer FQDN or hostnames, e.g., --filter-by-names peer-a,peer-b.netbird.cloud") statusCmd.PersistentFlags().StringVar(&statusFilter, "filter-by-status", "", "filters the detailed output by connection status(idle|connecting|connected), e.g., --filter-by-status connected") statusCmd.PersistentFlags().StringVar(&connectionTypeFilter, "filter-by-connection-type", "", "filters the detailed output by connection type (P2P|Relayed), e.g., --filter-by-connection-type P2P") + statusCmd.PersistentFlags().StringVar(&checkFlag, "check", "", "run a health check and exit with code 0 on success, 1 on failure (live|ready|startup)") } func statusFunc(cmd *cobra.Command, args []string) error { @@ -56,6 +58,10 @@ func statusFunc(cmd *cobra.Command, args []string) error { cmd.SetOut(cmd.OutOrStdout()) + if checkFlag != "" { + return runHealthCheck(cmd) + } + err := parseFilters() if err != nil { return err @@ -68,15 +74,17 @@ func statusFunc(cmd *cobra.Command, args []string) error { ctx := internal.CtxInitState(cmd.Context()) - resp, err := getStatus(ctx, false) + resp, err := getStatus(ctx, true, false) if err != nil { return err } status := resp.GetStatus() - if status == string(internal.StatusNeedsLogin) || status == string(internal.StatusLoginFailed) || - status == string(internal.StatusSessionExpired) { + needsAuth := status == string(internal.StatusNeedsLogin) || status == string(internal.StatusLoginFailed) || + status == string(internal.StatusSessionExpired) + + if needsAuth && !jsonFlag && !yamlFlag { cmd.Printf("Daemon status: %s\n\n"+ "Run UP command to log in with SSO (interactive login):\n\n"+ " netbird up \n\n"+ @@ -99,7 +107,17 @@ func statusFunc(cmd *cobra.Command, args []string) error { profName = activeProf.Name } - var outputInformationHolder = nbstatus.ConvertToStatusOutputOverview(resp.GetFullStatus(), anonymizeFlag, resp.GetDaemonVersion(), statusFilter, prefixNamesFilter, prefixNamesFilterMap, ipsFilterMap, connectionTypeFilter, profName) + var outputInformationHolder = nbstatus.ConvertToStatusOutputOverview(resp.GetFullStatus(), nbstatus.ConvertOptions{ + Anonymize: anonymizeFlag, + DaemonVersion: resp.GetDaemonVersion(), + DaemonStatus: nbstatus.ParseDaemonStatus(status), + StatusFilter: statusFilter, + PrefixNamesFilter: prefixNamesFilter, + PrefixNamesFilterMap: prefixNamesFilterMap, + IPsFilter: ipsFilterMap, + ConnectionTypeFilter: connectionTypeFilter, + ProfileName: profName, + }) var statusOutputString string switch { case detailFlag: @@ -121,7 +139,7 @@ func statusFunc(cmd *cobra.Command, args []string) error { return nil } -func getStatus(ctx context.Context, shouldRunProbes bool) (*proto.StatusResponse, error) { +func getStatus(ctx context.Context, fullPeerStatus bool, shouldRunProbes bool) (*proto.StatusResponse, error) { conn, err := DialClientGRPCServer(ctx, daemonAddr) if err != nil { //nolint @@ -131,7 +149,7 @@ func getStatus(ctx context.Context, shouldRunProbes bool) (*proto.StatusResponse } defer conn.Close() - resp, err := proto.NewDaemonServiceClient(conn).Status(ctx, &proto.StatusRequest{GetFullPeerStatus: true, ShouldRunProbes: shouldRunProbes}) + resp, err := proto.NewDaemonServiceClient(conn).Status(ctx, &proto.StatusRequest{GetFullPeerStatus: fullPeerStatus, ShouldRunProbes: shouldRunProbes}) if err != nil { return nil, fmt.Errorf("status failed: %v", status.Convert(err).Message()) } @@ -185,6 +203,83 @@ func enableDetailFlagWhenFilterFlag() { } } +func runHealthCheck(cmd *cobra.Command) error { + check := strings.ToLower(checkFlag) + switch check { + case "live", "ready", "startup": + default: + return fmt.Errorf("unknown check %q, must be one of: live, ready, startup", checkFlag) + } + + if err := util.InitLog(logLevel, util.LogConsole); err != nil { + return fmt.Errorf("init log: %w", err) + } + + ctx := internal.CtxInitState(cmd.Context()) + + isStartup := check == "startup" + resp, err := getStatus(ctx, isStartup, isStartup) + if err != nil { + return err + } + + switch check { + case "live": + return nil + case "ready": + return checkReadiness(resp) + case "startup": + return checkStartup(resp) + default: + return nil + } +} + +func checkReadiness(resp *proto.StatusResponse) error { + daemonStatus := internal.StatusType(resp.GetStatus()) + switch daemonStatus { + case internal.StatusIdle, internal.StatusConnecting, internal.StatusConnected: + return nil + case internal.StatusNeedsLogin, internal.StatusLoginFailed, internal.StatusSessionExpired: + return fmt.Errorf("readiness check: daemon status is %s", daemonStatus) + default: + return fmt.Errorf("readiness check: unexpected daemon status %q", daemonStatus) + } +} + +func checkStartup(resp *proto.StatusResponse) error { + fullStatus := resp.GetFullStatus() + if fullStatus == nil { + return fmt.Errorf("startup check: no full status available") + } + + if !fullStatus.GetManagementState().GetConnected() { + return fmt.Errorf("startup check: management not connected") + } + + if !fullStatus.GetSignalState().GetConnected() { + return fmt.Errorf("startup check: signal not connected") + } + + var relayCount, relaysConnected int + for _, r := range fullStatus.GetRelays() { + uri := r.GetURI() + if !strings.HasPrefix(uri, "rel://") && !strings.HasPrefix(uri, "rels://") { + continue + } + relayCount++ + if r.GetAvailable() { + relaysConnected++ + } + } + + if relayCount > 0 && relaysConnected == 0 { + return fmt.Errorf("startup check: no relay servers available (0/%d connected)", relayCount) + } + + return nil +} + func parseInterfaceIP(interfaceIP string) string { ip, _, err := net.ParseCIDR(interfaceIP) if err != nil { diff --git a/client/internal/debug/debug.go b/client/internal/debug/debug.go index 0f8243e7a87..395b99d557a 100644 --- a/client/internal/debug/debug.go +++ b/client/internal/debug/debug.go @@ -31,7 +31,6 @@ import ( nbstatus "github.com/netbirdio/netbird/client/status" mgmProto "github.com/netbirdio/netbird/shared/management/proto" "github.com/netbirdio/netbird/util" - "github.com/netbirdio/netbird/version" ) const readmeContent = `Netbird debug bundle @@ -418,7 +417,10 @@ func (g *BundleGenerator) addStatus() error { fullStatus := g.statusRecorder.GetFullStatus() protoFullStatus := nbstatus.ToProtoFullStatus(fullStatus) protoFullStatus.Events = g.statusRecorder.GetEventHistory() - overview := nbstatus.ConvertToStatusOutputOverview(protoFullStatus, g.anonymize, version.NetbirdVersion(), "", nil, nil, nil, "", profName) + overview := nbstatus.ConvertToStatusOutputOverview(protoFullStatus, nbstatus.ConvertOptions{ + Anonymize: g.anonymize, + ProfileName: profName, + }) statusOutput := overview.FullDetailSummary() statusReader := strings.NewReader(statusOutput) diff --git a/client/netbird-entrypoint.sh b/client/netbird-entrypoint.sh index 7c9fa021a1d..f8267d6ee27 100755 --- a/client/netbird-entrypoint.sh +++ b/client/netbird-entrypoint.sh @@ -1,12 +1,11 @@ #!/usr/bin/env bash set -eEuo pipefail -: ${NB_ENTRYPOINT_SERVICE_TIMEOUT:="5"} -: ${NB_ENTRYPOINT_LOGIN_TIMEOUT:="5"} +: ${NB_ENTRYPOINT_SERVICE_TIMEOUT:="30"} +: ${NB_ENTRYPOINT_LOGIN_TIMEOUT:="30"} NETBIRD_BIN="${NETBIRD_BIN:-"netbird"}" export NB_LOG_FILE="${NB_LOG_FILE:-"console,/var/log/netbird/client.log"}" service_pids=() -log_file_path="" _log() { # mimic Go logger's output for easier parsing @@ -33,60 +32,50 @@ on_exit() { fi } -wait_for_message() { - local timeout="${1}" message="${2}" - if test "${timeout}" -eq 0; then - info "not waiting for log line ${message@Q} due to zero timeout." - elif test -n "${log_file_path}"; then - info "waiting for log line ${message@Q} for ${timeout} seconds..." - grep -E -q "${message}" <(timeout "${timeout}" tail -F "${log_file_path}" 2>/dev/null) - else - info "log file unsupported, sleeping for ${timeout} seconds..." - sleep "${timeout}" - fi -} - -locate_log_file() { - local log_files_string="${1}" - - while read -r log_file; do - case "${log_file}" in - console | syslog) ;; - *) - log_file_path="${log_file}" - return - ;; - esac - done < <(sed 's#,#\n#g' <<<"${log_files_string}") - - warn "log files parsing for ${log_files_string@Q} is not supported by debug bundles" - warn "please consider removing the \$NB_LOG_FILE or setting it to real file, before gathering debug bundles." -} - wait_for_daemon_startup() { local timeout="${1}" + if [[ "${timeout}" -eq 0 ]]; then + info "not waiting for daemon startup due to zero timeout." + return + fi - if test -n "${log_file_path}"; then - if ! wait_for_message "${timeout}" "started daemon server"; then - warn "log line containing 'started daemon server' not found after ${timeout} seconds" - warn "daemon failed to start, exiting..." - exit 1 + local deadline=$((SECONDS + timeout)) + while [[ "${SECONDS}" -lt "${deadline}" ]]; do + if "${NETBIRD_BIN}" status --check live 2>/dev/null; then + return fi - else - warn "daemon service startup not discovered, sleeping ${timeout} instead" - sleep "${timeout}" - fi + sleep 1 + done + + warn "daemon did not become responsive after ${timeout} seconds, exiting..." + exit 1 } login_if_needed() { local timeout="${1}" - if test -n "${log_file_path}" && wait_for_message "${timeout}" 'peer has been successfully registered|management connection state READY'; then + if "${NETBIRD_BIN}" status --check ready 2>/dev/null; then info "already logged in, skipping 'netbird up'..." - else + return + fi + + if [[ "${timeout}" -eq 0 ]]; then info "logging in..." "${NETBIRD_BIN}" up + return fi + + local deadline=$((SECONDS + timeout)) + while [[ "${SECONDS}" -lt "${deadline}" ]]; do + if "${NETBIRD_BIN}" status --check ready 2>/dev/null; then + info "already logged in, skipping 'netbird up'..." + return + fi + sleep 1 + done + + info "logging in..." + "${NETBIRD_BIN}" up } main() { @@ -95,7 +84,6 @@ main() { service_pids+=("$!") info "registered new service process 'netbird service run', currently running: ${service_pids[@]@Q}" - locate_log_file "${NB_LOG_FILE}" wait_for_daemon_startup "${NB_ENTRYPOINT_SERVICE_TIMEOUT}" login_if_needed "${NB_ENTRYPOINT_LOGIN_TIMEOUT}" diff --git a/client/status/status.go b/client/status/status.go index f13163a41d8..8c932bbab29 100644 --- a/client/status/status.go +++ b/client/status/status.go @@ -25,6 +25,38 @@ import ( "github.com/netbirdio/netbird/version" ) +// DaemonStatus represents the current state of the NetBird daemon. +// These values mirror internal.StatusType but are defined here to avoid an import cycle. +type DaemonStatus string + +const ( + DaemonStatusIdle DaemonStatus = "Idle" + DaemonStatusConnecting DaemonStatus = "Connecting" + DaemonStatusConnected DaemonStatus = "Connected" + DaemonStatusNeedsLogin DaemonStatus = "NeedsLogin" + DaemonStatusLoginFailed DaemonStatus = "LoginFailed" + DaemonStatusSessionExpired DaemonStatus = "SessionExpired" +) + +// ParseDaemonStatus converts a raw status string to DaemonStatus. +// Unrecognized values are preserved as-is to remain visible during version skew. +func ParseDaemonStatus(s string) DaemonStatus { + return DaemonStatus(s) +} + +// ConvertOptions holds parameters for ConvertToStatusOutputOverview. +type ConvertOptions struct { + Anonymize bool + DaemonVersion string + DaemonStatus DaemonStatus + StatusFilter string + PrefixNamesFilter []string + PrefixNamesFilterMap map[string]struct{} + IPsFilter map[string]struct{} + ConnectionTypeFilter string + ProfileName string +} + type PeerStateDetailOutput struct { FQDN string `json:"fqdn" yaml:"fqdn"` IP string `json:"netbirdIp" yaml:"netbirdIp"` @@ -102,6 +134,7 @@ type OutputOverview struct { Peers PeersStateOutput `json:"peers" yaml:"peers"` CliVersion string `json:"cliVersion" yaml:"cliVersion"` DaemonVersion string `json:"daemonVersion" yaml:"daemonVersion"` + DaemonStatus DaemonStatus `json:"daemonStatus" yaml:"daemonStatus"` ManagementState ManagementStateOutput `json:"management" yaml:"management"` SignalState SignalStateOutput `json:"signal" yaml:"signal"` Relays RelayStateOutput `json:"relays" yaml:"relays"` @@ -120,7 +153,8 @@ type OutputOverview struct { SSHServerState SSHServerStateOutput `json:"sshServer" yaml:"sshServer"` } -func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, daemonVersion string, statusFilter string, prefixNamesFilter []string, prefixNamesFilterMap map[string]struct{}, ipsFilter map[string]struct{}, connectionTypeFilter string, profName string) OutputOverview { +// ConvertToStatusOutputOverview converts protobuf status to the output overview. +func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, opts ConvertOptions) OutputOverview { managementState := pbFullStatus.GetManagementState() managementOverview := ManagementStateOutput{ URL: managementState.GetURL(), @@ -137,12 +171,13 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, da relayOverview := mapRelays(pbFullStatus.GetRelays()) sshServerOverview := mapSSHServer(pbFullStatus.GetSshServerState()) - peersOverview := mapPeers(pbFullStatus.GetPeers(), statusFilter, prefixNamesFilter, prefixNamesFilterMap, ipsFilter, connectionTypeFilter) + peersOverview := mapPeers(pbFullStatus.GetPeers(), opts.StatusFilter, opts.PrefixNamesFilter, opts.PrefixNamesFilterMap, opts.IPsFilter, opts.ConnectionTypeFilter) overview := OutputOverview{ Peers: peersOverview, CliVersion: version.NetbirdVersion(), - DaemonVersion: daemonVersion, + DaemonVersion: opts.DaemonVersion, + DaemonStatus: opts.DaemonStatus, ManagementState: managementOverview, SignalState: signalOverview, Relays: relayOverview, @@ -157,11 +192,11 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, da NSServerGroups: mapNSGroups(pbFullStatus.GetDnsServers()), Events: mapEvents(pbFullStatus.GetEvents()), LazyConnectionEnabled: pbFullStatus.GetLazyConnectionEnabled(), - ProfileName: profName, + ProfileName: opts.ProfileName, SSHServerState: sshServerOverview, } - if anon { + if opts.Anonymize { anonymizer := anonymize.NewAnonymizer(anonymize.DefaultAddresses()) anonymizeOverview(anonymizer, &overview) } diff --git a/client/status/status_test.go b/client/status/status_test.go index b02d78d64ae..7754eebae97 100644 --- a/client/status/status_test.go +++ b/client/status/status_test.go @@ -176,6 +176,7 @@ var overview = OutputOverview{ Events: []SystemEventOutput{}, CliVersion: version.NetbirdVersion(), DaemonVersion: "0.14.1", + DaemonStatus: DaemonStatusConnected, ManagementState: ManagementStateOutput{ URL: "my-awesome-management.com:443", Connected: true, @@ -238,7 +239,10 @@ var overview = OutputOverview{ } func TestConversionFromFullStatusToOutputOverview(t *testing.T) { - convertedResult := ConvertToStatusOutputOverview(resp.GetFullStatus(), false, resp.GetDaemonVersion(), "", nil, nil, nil, "", "") + convertedResult := ConvertToStatusOutputOverview(resp.GetFullStatus(), ConvertOptions{ + DaemonVersion: resp.GetDaemonVersion(), + DaemonStatus: ParseDaemonStatus(resp.GetStatus()), + }) assert.Equal(t, overview, convertedResult) } @@ -329,6 +333,7 @@ func TestParsingToJSON(t *testing.T) { }, "cliVersion": "development", "daemonVersion": "0.14.1", + "daemonStatus": "Connected", "management": { "url": "my-awesome-management.com:443", "connected": true, @@ -452,6 +457,7 @@ func TestParsingToYAML(t *testing.T) { networks: [] cliVersion: development daemonVersion: 0.14.1 +daemonStatus: Connected management: url: my-awesome-management.com:443 connected: true diff --git a/client/wasm/cmd/main.go b/client/wasm/cmd/main.go index 26022ffc7b2..d8e50ab6d21 100644 --- a/client/wasm/cmd/main.go +++ b/client/wasm/cmd/main.go @@ -18,7 +18,6 @@ import ( "github.com/netbirdio/netbird/client/wasm/internal/rdp" "github.com/netbirdio/netbird/client/wasm/internal/ssh" "github.com/netbirdio/netbird/util" - "github.com/netbirdio/netbird/version" ) const ( @@ -350,7 +349,7 @@ func getStatusOverview(client *netbird.Client) (nbstatus.OutputOverview, error) pbFullStatus := fullStatus.ToProto() - return nbstatus.ConvertToStatusOutputOverview(pbFullStatus, false, version.NetbirdVersion(), "", nil, nil, nil, "", ""), nil + return nbstatus.ConvertToStatusOutputOverview(pbFullStatus, nbstatus.ConvertOptions{}), nil } // createStatusMethod creates the status method that returns JSON diff --git a/proxy/internal/debug/handler.go b/proxy/internal/debug/handler.go index ab75c8b727d..2b6171afb3d 100644 --- a/proxy/internal/debug/handler.go +++ b/proxy/internal/debug/handler.go @@ -409,17 +409,13 @@ func (h *Handler) handleClientStatus(w http.ResponseWriter, r *http.Request, acc } pbStatus := nbstatus.ToProtoFullStatus(fullStatus) - overview := nbstatus.ConvertToStatusOutputOverview( - pbStatus, - false, - version.NetbirdVersion(), - statusFilter, - prefixNamesFilter, - prefixNamesFilterMap, - ipsFilterMap, - connectionTypeFilter, - "", - ) + overview := nbstatus.ConvertToStatusOutputOverview(pbStatus, nbstatus.ConvertOptions{ + StatusFilter: statusFilter, + PrefixNamesFilter: prefixNamesFilter, + PrefixNamesFilterMap: prefixNamesFilterMap, + IPsFilter: ipsFilterMap, + ConnectionTypeFilter: connectionTypeFilter, + }) if wantJSON { h.writeJSON(w, map[string]interface{}{