diff --git a/README.md b/README.md index 6e2eaa8..85f42ee 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ sudo wget -O /usr/local/bin/yetis https://github.com/glossd/yetis/raw/refs/heads sudo yetis start ``` *Must be `root` to configure `proxy` -Yetis will start in the background. The logs are available at `/tmp/yetis.log`. You can specify your own log directory with `-d` flag. +Yetis will start in the background. You can pass [yetis configuration](#yetis-server-configuration) with `-f` flag. ### Available commands #### Deploy your process: ```shell @@ -114,3 +114,18 @@ Zero downtime is achieved with `RollingUpdate` strategy and `restart` command. Y then direct traffic to the new instance, and only then will terminate the old instance. The new deployment will have the name with an index i.e. frontend-1, frontend-2 and so on. If deployment has `Recreate` strategy, Yetis will wait for the termination of the old instance before starting a new one with the same name. It's the same as in [Kubernetes](https://medium.com/@muppedaanvesh/rolling-update-recreate-deployment-strategies-in-kubernetes-️-327b59f27202) + +### Yetis Server Configuration +Configure Yetis server when starting it: `yetis start -f /path/to/config.yml` +```yaml +logdir: /tmp # yetis.log will be stored in there. Defaults to /tmp +alerting: # Alerts when a managed process fails or recovers. + mail: # add SMPT creds of your smpt server for alerting + host: smtp.host.com + port: 587 + from: noreply@mail.com + to: + - yourmail@mail.com + username: authUser + password: authPass +``` diff --git a/build/yetis b/build/yetis index 6e8e17c..3c4d692 100755 Binary files a/build/yetis and b/build/yetis differ diff --git a/client/commands.go b/client/commands.go index a2be9d1..a66d453 100644 --- a/client/commands.go +++ b/client/commands.go @@ -24,17 +24,36 @@ func init() { fetch.SetBaseURL(baseURL) } -func StartBackground(logdir string) { +type Settings struct { + Alerting +} + +type Alerting interface { +} + +type MailAlerting struct { + Host string + From string + Username string + Password string +} + +func StartBackground(pathToConfig string) { if !unix.ExecutableExists("yetis") { fmt.Println("yetis is not installed") } - logFilePath := filepath.Join(logdir, "yetis.log") + c := common.YetisConfig{}.WithDefaults() + if pathToConfig != "" { + c = common.ReadServerConfig(pathToConfig) + } + + logFilePath := filepath.Join(c.Logdir, "yetis.log") file, err := os.OpenFile(logFilePath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0750) if err != nil { fmt.Println("Failed to open log file at", logFilePath, err) } - cmd := exec.Command("nohup", "yetis", "run") + cmd := exec.Command("nohup", "yetis", "run", "-f", pathToConfig) cmd.Stdout = file cmd.Stderr = file err = cmd.Start() @@ -66,7 +85,7 @@ func WatchGetDeployments() { } func printDeploymentTable() (int, bool) { - views, err := fetch.Get[[]server.DeploymentView]("/deployments") + views, err := fetch.Get[[]server.DeploymentInfo]("/deployments") if err != nil { fmt.Println(err) return 0, false @@ -133,8 +152,8 @@ func DescribeDeployment(name string) { } } -func GetDeployment(name string) (server.GetResponse, error) { - return fetch.Get[server.GetResponse]("/deployments/" + name) +func GetDeployment(name string) (server.DeploymentFullInfo, error) { + return fetch.Get[server.DeploymentFullInfo]("/deployments/" + name) } func DeleteDeployment(name string) { @@ -172,7 +191,7 @@ func Apply(path string) []error { } func Logs(name string, stream bool) { - r, err := fetch.Get[server.GetResponse]("/deployments/" + name) + r, err := fetch.Get[server.DeploymentFullInfo]("/deployments/" + name) if err != nil { fmt.Println(err) } else { diff --git a/client/commands_test.go b/client/commands_test.go index 5539ff8..e45b4b7 100644 --- a/client/commands_test.go +++ b/client/commands_test.go @@ -8,7 +8,7 @@ import ( ) func TestInfo(t *testing.T) { - go server.Run() + go server.Run("") t.Cleanup(server.Stop) time.Sleep(5 * time.Millisecond) res, err := fetch.Get[server.InfoResponse](baseHost + "/info") diff --git a/common/alert.go b/common/alert.go new file mode 100644 index 0000000..805d0c7 --- /dev/null +++ b/common/alert.go @@ -0,0 +1 @@ +package common diff --git a/common/server_config.go b/common/server_config.go new file mode 100644 index 0000000..b80d0a7 --- /dev/null +++ b/common/server_config.go @@ -0,0 +1,97 @@ +package common + +import ( + "fmt" + "io" + "log" + "net/smtp" + "os" + "reflect" + yaml "sigs.k8s.io/yaml/goyaml.v2" +) + +type YetisConfig struct { + Logdir string + Alerting Alerting +} + +func (yc YetisConfig) WithDefaults() YetisConfig { + if yc.Logdir == "" { + yc.Logdir = "/tmp" + } + return yc +} + +type Alerting struct { + Mail Mail +} + +func (a Alerting) Send(title, description string) error { + var errStr string + if a.Mail.Validate() == nil { + err := a.Mail.Send(title, description) + if err != nil { + errStr += err.Error() + " + " + } + } + if errStr != "" { + return fmt.Errorf("alert failure: %s", errStr) + } + return nil +} + +type Mail struct { + Host string + Port int + From string + To []string + Username string + Password string +} + +func (m Mail) Validate() error { + if m.Host == "" { + return fmt.Errorf("mail: host can't be empty") + } + if m.From == "" { + return fmt.Errorf("mail: from field can't be empty") + } + if len(m.To) == 0 { + return fmt.Errorf("mail: to field can't be empty") + } + return nil +} + +func (m Mail) Send(title, description string) error { + smtpAuth := smtp.PlainAuth("", m.Username, m.Password, m.Host) + address := fmt.Sprintf("%s:%d", m.Host, m.Port) + msg := fmt.Sprintf("From: %s\nSubject: %s\n\n%s", m.From, title, description) + return smtp.SendMail(address, smtpAuth, m.From, m.To, []byte(msg)) +} + +func ReadServerConfig(path string) YetisConfig { + f, err := os.Open(path) + if err != nil { + log.Fatalf("Couldn't open server config: %s", err) + } + return readServerConfig(f).WithDefaults() +} + +func readServerConfig(f io.Reader) YetisConfig { + str, err := io.ReadAll(f) + if err != nil { + log.Fatalf("Couldn't read server config: %s", err) + } + var c YetisConfig + err = yaml.Unmarshal(str, &c) + if err != nil { + log.Fatalf("Failed to unmarshal config: %s", err) + } + if !reflect.ValueOf(c.Alerting.Mail).IsZero() { + err = c.Alerting.Mail.Validate() + if err != nil { + log.Fatalf("Mail validation failed: %s", err) + } + } + return c +} diff --git a/common/server_config_test.go b/common/server_config_test.go new file mode 100644 index 0000000..3865f79 --- /dev/null +++ b/common/server_config_test.go @@ -0,0 +1,36 @@ +package common + +import ( + "bytes" + "testing" +) + +func TestYetisConfig(t *testing.T) { + in := ` +logdir: /tmp # yetis.log will be stored in there. Defaults to /tmp +alerting: + mail: # add SMPT creds of your smpt server for alerting + host: smtp.host.com + port: 587 + from: noreply@mail.com + to: + - yourmail@mail.com + username: authUser + password: authPass +` + + res := readServerConfig(bytes.NewBufferString(in)) + if res.Logdir != "/tmp" || res.Alerting.Mail.Host != "smtp.host.com" || len(res.Alerting.Mail.To) != 1 { + t.Fatal("Wrong config:", res) + } + assert(t, res.Alerting.Mail.Validate(), nil) +} + +func TestSendEmail(t *testing.T) { + t.SkipNow() + + c := ReadServerConfig("../tmp/yetis.yml") + assert(t, c.Alerting.Mail.Validate(), nil) + err := c.Alerting.Mail.Send("Hello Test", "This came from TestSendEmail") + assert(t, err, nil) +} diff --git a/common/unix/commands_test.go b/common/unix/commands_test.go index bf11582..b87ed0a 100644 --- a/common/unix/commands_test.go +++ b/common/unix/commands_test.go @@ -13,7 +13,7 @@ import ( ) func TestIsProcessAlive(t *testing.T) { - cmd := exec.Command("sleep", "0.03") + cmd := exec.Command("sleep", "0.05") err := cmd.Start() if err != nil { t.Fatalf("error launching process: %s", err) @@ -26,7 +26,7 @@ func TestIsProcessAlive(t *testing.T) { t.Fatal("pid shouldn't exist") // probs:) } - time.Sleep(35 * time.Millisecond) + time.Sleep(55 * time.Millisecond) if IsProcessAlive(pid) { t.Fatal("sleep should have terminated") } diff --git a/common/version.go b/common/version.go index fc072b0..372492e 100644 --- a/common/version.go +++ b/common/version.go @@ -1,3 +1,3 @@ package common -const YetisVersion = "v0.3.2" +const YetisVersion = "v0.4.0" diff --git a/itests/iptables_test.go b/itests/iptables_test.go index 0cfda99..c3343c3 100644 --- a/itests/iptables_test.go +++ b/itests/iptables_test.go @@ -17,7 +17,7 @@ import ( func TestProxyUpdatesWhenDeploymentRestartsOnLivenessFailure(t *testing.T) { skipIfNotIptables(t) - go server.Run() + go server.Run("") t.Cleanup(server.Stop) // let the server start time.Sleep(5 * time.Millisecond) @@ -89,7 +89,7 @@ func TestProxyUpdatesWhenDeploymentRestartsOnLivenessFailure(t *testing.T) { func TestRestart_RollingUpdate_ZeroDowntime(t *testing.T) { skipIfNotIptables(t) - go server.Run() + go server.Run("") t.Cleanup(server.Stop) // let the server start time.Sleep(5 * time.Millisecond) @@ -148,7 +148,7 @@ func TestRestart_RollingUpdate_ZeroDowntime(t *testing.T) { func TestDeploymentRestartWithNewYetisPort(t *testing.T) { skipIfNotIptables(t) - go server.Run() + go server.Run("") t.Cleanup(server.Stop) // let the server start time.Sleep(5 * time.Millisecond) diff --git a/itests/server_test.go b/itests/server_test.go index e520619..86d2f9d 100644 --- a/itests/server_test.go +++ b/itests/server_test.go @@ -21,7 +21,7 @@ import ( // server_test.go:42: before first healthcheck: expected Pending status, got Running, restarts 0 func TestLivenessRestart(t *testing.T) { unix.KillByPort(server.YetisServerPort, true) - go server.Run() + go server.Run("") t.Cleanup(server.Stop) // let the server start time.Sleep(time.Millisecond) @@ -31,7 +31,7 @@ func TestLivenessRestart(t *testing.T) { t.Fatalf("apply errors: %v", errs) } - check := func(f func(server.GetResponse)) { + check := func(f func(server.DeploymentFullInfo)) { dr, err := client.GetDeployment("hello") if err != nil { t.Fatal(err) @@ -40,7 +40,7 @@ func TestLivenessRestart(t *testing.T) { } checkSR := func(description string, s server.ProcessStatus, restarts int) { - check(func(r server.GetResponse) { + check(func(r server.DeploymentFullInfo) { if r.Status != s.String() { t.Fatalf("%s: expected %s status, got %s, restarts %d", description, s.String(), r.Status, r.Restarts) } diff --git a/main.go b/main.go index 9e740c1..4a7fafc 100644 --- a/main.go +++ b/main.go @@ -37,7 +37,17 @@ func main() { fmt.Printf("Client: version=%s\n", common.YetisVersion) case "run": // starts Yetis server in the foreground - server.Run() + if len(args) == 2 { + server.Run("") + return + } + + if len(args) != 4 || args[2] != "-f" { + printFlags("run", Flag{Def: "-f FILENAME", Des: "path to the yetis config"}) + return + } + + server.Run(args[3]) case "start": currentUser, err := user.Current() if err != nil { @@ -46,16 +56,16 @@ func main() { if currentUser.Username != "root" { log.Println("Warning: not running as root, Yetis won't be to create a proxy") } - logdir := "/tmp" - if len(args) > 3 { - if args[2] == "-d" { - logdir = args[3] - } else { - printFlags("start", "-d directory for the server log") - return - } + if len(args) == 2 { + client.StartBackground("") + return + } + if len(args) != 4 || args[2] != "-f" { + printFlags("start", Flag{Def: "-f FILENAME", Des: "path to the yetis config"}) + return } - client.StartBackground(logdir) + + client.StartBackground(args[3]) case "shutdown": if len(args) == 2 { client.ShutdownServer(5 * time.Minute) @@ -130,10 +140,17 @@ func main() { } } -func printFlags(cmd string, flags ...string) { +type Flag struct { + // Definition e.g. -f FILENAME + Def string + // Description e.g. path to the config + Des string +} + +func printFlags(cmd string, flags ...Flag) { fmt.Printf("The flags for %s command are:\n", cmd) - for _, flag := range flags { - fmt.Println(" " + flag) + for _, f := range flags { + fmt.Println(" " + f.Def + " " + f.Des) } } @@ -144,7 +161,7 @@ func needName() { func printHelp() { fmt.Printf(`The commands are: Server Commands: - start [-d] start Yetis server + start [-f FILENAME] start Yetis server shutdown terminate Yetis server info print server status Resources Commands: diff --git a/server/alerting.go b/server/alerting.go new file mode 100644 index 0000000..990727f --- /dev/null +++ b/server/alerting.go @@ -0,0 +1,62 @@ +package server + +import ( + "fmt" + "github.com/glossd/fetch" + "github.com/glossd/yetis/common" + "log" + yaml "sigs.k8s.io/yaml/goyaml.v2" +) + +var alertStore = common.Map[string, bool]{} + +func AlertFail(name string) error { + d, ok := getDeployment(name) + if !ok { + err := fmt.Errorf("deployment %s not found", name) + log.Printf("AlertFail skipped: deployment %s not found\n", name) + return err + } + _, loaded := alertStore.LoadOrStore(rootNameForRollingUpdate(name), true) + if loaded { + return fmt.Errorf("alert has already been sent") + } + + info, err := fetch.Marshal(deploymentToInfo(d)) + if err != nil { + log.Printf("AlertFail skipped: marshal: %s\n", err) + return err + } + err = serverConfig.Alerting.Send(fmt.Sprintf("Deployment %s Failed", d.spec.Name), info) + if err != nil { + log.Printf("AlertFail skipped: send: %s", err) + return err + } + return nil +} + +func AlertRecovery(name string) error { + _, loaded := alertStore.LoadAndDelete(rootNameForRollingUpdate(name)) + if !loaded { + err := fmt.Errorf("alert not triggered for %s", name) + log.Printf("AlertRecovery skipped: %s\n", err) + return err + } + d, ok := getDeployment(name) + if !ok { + err := fmt.Errorf("deployment %s not found", name) + log.Printf("AlertRecovery skipped: %s\n", err) + return err + } + info, err := yaml.Marshal(deploymentToInfo(d)) + if err != nil { + log.Printf("AlertRecovery skipped: marshal: %s\n", err) + return err + } + err = serverConfig.Alerting.Send(fmt.Sprintf("Deployment %s Recovered", d.spec.Name), string(info)) + if err != nil { + log.Printf("AlertRecovery skipped: send: %s", err) + return err + } + return nil +} diff --git a/server/alerting_test.go b/server/alerting_test.go new file mode 100644 index 0000000..7a802d8 --- /dev/null +++ b/server/alerting_test.go @@ -0,0 +1,30 @@ +package server + +import ( + "github.com/glossd/yetis/common" + "strings" + "testing" +) + +func TestAlertRecovery_WithoutAlertFail(t *testing.T) { + err := AlertRecovery("hello") + assert(t, true, strings.HasPrefix(err.Error(), "alert not triggered for")) +} + +func TestAlertFailAndRecovery(t *testing.T) { + saveDeployment(common.DeploymentSpec{Name: "hello"}, false) + err := AlertFail("hello") + assert(t, err, nil) + err = AlertRecovery("hello") + assert(t, err, nil) + err = AlertRecovery("hello") + assert(t, true, strings.HasPrefix(err.Error(), "alert not triggered")) +} + +func TestAlertFail_Double(t *testing.T) { + saveDeployment(common.DeploymentSpec{Name: "hello"}, false) + err := AlertFail("hello") + assert(t, err, nil) + err = AlertFail("hello") + assert(t, err.Error(), "alert has already been sent") +} diff --git a/server/handlers_deployment.go b/server/handlers_deployment.go index 8d55275..fe57d58 100644 --- a/server/handlers_deployment.go +++ b/server/handlers_deployment.go @@ -130,7 +130,7 @@ func isYetisPortUsed(c common.DeploymentSpec) bool { return c.YetisPort() == c.LivenessProbe.TcpSocket.Port } -type DeploymentView struct { +type DeploymentInfo struct { Name string Status string Pid int @@ -142,14 +142,14 @@ type DeploymentView struct { PortInfo string } -func ListDeployment() ([]DeploymentView, error) { - var res []DeploymentView +func ListDeployment() ([]DeploymentInfo, error) { + var res []DeploymentInfo rangeDeployments(func(name string, p deployment) { portInfo := strconv.Itoa(p.spec.LivenessProbe.Port()) if p.spec.Proxy.Port > 0 { portInfo = strconv.Itoa(p.spec.Proxy.Port) + " to " + strconv.Itoa(p.spec.LivenessProbe.Port()) } - res = append(res, DeploymentView{ + res = append(res, DeploymentInfo{ Name: name, Status: p.status.String(), Pid: p.pid, @@ -165,8 +165,8 @@ func ListDeployment() ([]DeploymentView, error) { return res, nil } -func sortDeployments(res []DeploymentView) { - slices.SortFunc(res, func(a, b DeploymentView) int { +func sortDeployments(res []DeploymentInfo) { + slices.SortFunc(res, func(a, b DeploymentInfo) int { return cmp.Compare(a.Name, b.Name) }) } @@ -186,7 +186,7 @@ func ageSince(t time.Time) string { return fmt.Sprintf("%ds", int(age.Seconds())) } -type GetResponse struct { +type DeploymentFullInfo struct { Pid int Restarts int Status string @@ -195,7 +195,7 @@ type GetResponse struct { Spec common.DeploymentSpec } -func GetDeployment(r fetch.Request[fetch.Empty]) (*GetResponse, error) { +func GetDeployment(r fetch.Request[fetch.Empty]) (*DeploymentFullInfo, error) { name := r.PathValues["name"] if name == "" { return nil, fmt.Errorf("name can't be empty") @@ -205,14 +205,18 @@ func GetDeployment(r fetch.Request[fetch.Empty]) (*GetResponse, error) { return nil, fmt.Errorf("name '%s' doesn't exist", name) } - return &GetResponse{ + return deploymentToInfo(p), nil +} + +func deploymentToInfo(p deployment) *DeploymentFullInfo { + return &DeploymentFullInfo{ Pid: p.pid, Restarts: p.restarts, Status: p.status.String(), Age: ageSince(p.createdAt), LogPath: p.logPath, Spec: p.spec, - }, nil + } } func DeleteDeployment(r fetch.Request[fetch.Empty]) error { diff --git a/server/handlers_deployment_test.go b/server/handlers_deployment_test.go index cd698a4..f55e078 100644 --- a/server/handlers_deployment_test.go +++ b/server/handlers_deployment_test.go @@ -5,7 +5,7 @@ import ( ) func TestSortDeployments(t *testing.T) { - res := []DeploymentView{{Name: "b"}, {Name: "a"}} + res := []DeploymentInfo{{Name: "b"}, {Name: "a"}} sortDeployments(res) assert(t, res[0].Name, "a") assert(t, res[1].Name, "b") diff --git a/server/liveness.go b/server/liveness.go index e21b8b7..a758906 100644 --- a/server/liveness.go +++ b/server/liveness.go @@ -134,6 +134,7 @@ func heartbeat(deploymentName string, restartsLimit int) heartbeatResult { if p.restarts >= restartsLimit { updateDeploymentStatus(oldSpec.Name, Failed) thresholdMap.Delete(oldSpec.Name) + AlertFail(oldSpec.Name) return tryAgain } log.Printf("Restarting '%s' deployment, failureThreshold was reached\n", oldSpec.Name) @@ -177,6 +178,12 @@ func heartbeat(deploymentName string, restartsLimit int) heartbeatResult { } if tsh.SuccessCount >= dep.spec.LivenessProbe.SuccessThreshold { updateDeploymentStatus(dep.spec.Name, Running) + if dep.status != Running { // if it wasn't already running + // Status could be Pending after Failed. Threshold could be cleaned after Failed. + // It will be triggered at the start of the process for example, but AlertRecovery checks if the process failed before. + // not specifying the right condition will spam logs with "alert was not triggered" + AlertRecovery(dep.spec.Name) + } } return alive } diff --git a/server/server.go b/server/server.go index 63007d6..cff4a84 100644 --- a/server/server.go +++ b/server/server.go @@ -19,9 +19,15 @@ import ( const YetisServerPort = 34711 -func Run() { +var serverConfig = common.YetisConfig{}.WithDefaults() + +func Run(configPath string) { log.SetFlags(log.LstdFlags | log.Lmicroseconds) // adds time to the log + if configPath != "" { + serverConfig = common.ReadServerConfig(configPath) + } + mux := http.NewServeMux() fetch.SetHandlerConfig(fetch.HandlerConfig{