diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml new file mode 100644 index 0000000..a767b1d --- /dev/null +++ b/.github/workflows/build_test.yml @@ -0,0 +1,33 @@ +name: Build and Test + +on: + push: + branches: ["main", "develop"] + pull_request: + branches: ["main", "develop"] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v4 + + - name: setup Go + uses: actions/setup-go@v5 + with: + go-version: 1.25 + cache: true + cache-dependency-path: go.sum + + - name: install dependencies + run: go mod download + + - name: build + run: | + #test version not in use for release + export VERSION=test-$(git rev-parse --short HEAD) + make build + + - name: test + run: go test -v ./... diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..9cf45b9 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,89 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: read + packages: write + +jobs: + create-oci-image: + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/vitistack/gslb-operator + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set release date + id: date + run: echo "DATE=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=${{ github.ref_name }} + DATE=${{ steps.date.outputs.DATE }} + platforms: linux/amd64,linux/arm64 + cache-from: type=gha + cache-to: type=gha,mode=max + build-and-publish-helm-chart: + needs: + - create-oci-image + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v4 + + - name: Install helm + uses: azure/setup-helm@v1 + with: + version: v3.16.4 + + - name: install-yq + env: + VERSION: v4.44.5 + BINARY: yq_linux_amd64 + run: | + wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY}.tar.gz -O - | tar xz && mv ${BINARY} yq && chmod +x yq + + - name: build and push chart + env: + VERSION: ${{ github.ref_name }} + run: | + export HELM_VERSION=${VERSION#v*} + ./yq e -i '.version = strenv(HELM_VERSION),.appVersion = strenv(VERSION)' charts/gslb-operator/Chart.yaml + ./yq e -i '.image.tag = strenv(HELM_VERSION),.image.repository = "ncr.sky.nhn.no/ghcr/vitistack/gslb-operator"' charts/gslb-operator/values.yaml + helm package charts/gslb-operator + echo ${{ secrets.GITHUB_TOKEN }} | helm registry login -u ${{ github.actor }} ghcr.io --password-stdin + helm push gslb-operator-${HELM_VERSION}.tgz oci://ghcr.io/vitistack/helm/ diff --git a/.github/workflows/security_scan.yml b/.github/workflows/security_scan.yml index b2c30db..fa9dd2e 100644 --- a/.github/workflows/security_scan.yml +++ b/.github/workflows/security_scan.yml @@ -4,7 +4,7 @@ # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. -name: "Security Scan" +name: Security Scan on: push: diff --git a/.gitignore b/.gitignore index 39f3e89..d91ff60 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ *.so *.dylib +/bin + # Test binary, built with `go test -c` *.test @@ -26,6 +28,7 @@ go.work.sum # env file *.env +secrets store.json # Editor/IDE diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..efb0f19 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +FROM golang:1.26 AS build + +LABEL MAINTAINER="espen.wobbes@nhn.no" + +ARG VERSION +ARG DATE + +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +# build image +RUN CGO_ENABLED=0 go build -ldflags "-s -w -X main.version=${VERSION} -X main.buildDate=${DATE}" -o gslb-operator ./cmd/main.go + + +FROM alpine:3.23 + +WORKDIR /app + +RUN addgroup -S gslb-group && adduser -S gslb-operator -G gslb-group + +COPY --from=build /app/gslb-operator /app/gslb-operator +COPY sandbox.lua /app + +# change ownership of directory +RUN chown -R gslb-operator:gslb-group /app + +# sandbox is read-only +RUN chmod 440 sandbox.lua +USER gslb-operator + +CMD [ "./gslb-operator" ] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..701ba00 --- /dev/null +++ b/Makefile @@ -0,0 +1,81 @@ +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +# Variables +GO_VERSION := $(shell go version | cut -d' ' -f3) +DOCKER_COMPOSE := docker compose +HELM := helm +KUBECTL := kubectl +DATE := $(shell date +%Y-%m-%d) +VERSION ?= "test" + + +##@ Build +.PHONY: build run +build: check-tools ## Build the Go application. + @echo "Building GSLB - Operator binary..." + @echo "Version: $(VERSION)" + @echo "Date: $(DATE)" + @go build -ldflags "-s -w -X main.version=$(VERSION) -X main.buildDate=$(DATE)" -o ./bin/ ./cmd/main.go + +run: + @echo "Running GSLB - Operator" + @go run -ldflags "-X main.version=0.0.0-test -X main.buildDate=$(DATE)" ./cmd/main.go + +test: ## Run tests + @echo "Running tests..." + @go test -v ./... + @echo "Tests complete!" + +deps: ## Download and verify dependencies + @echo "Downloading dependencies..." + @go mod download + @go mod verify + @go mod tidy + @echo "Dependencies updated!" + +update-deps: ## Update dependencies + @echo "Updating dependencies..." + @go get -u ./... + @go mod tidy + @echo "Dependencies updated!" + +##@ Code Quality +.PHONY: lint format security-scan bench +lint: ## Run Go linters + @echo "Running Go linters..." + @command -v golangci-lint >/dev/null 2>&1 || { echo "Installing golangci-lint..."; go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest; } + @golangci-lint run ./... + @echo "Linting complete!" + +format: ## Format Go code + @echo "Formatting Go code..." + @go fmt ./... + @echo "Code formatted!" + +go-security-scan: ## Run security scan + @echo "Running security scan..." + @command -v govulncheck >/dev/null 2>&1 || { echo "Installing govulncheck..."; go install golang.org/x/vuln/cmd/govulncheck@latest; } + @gosec ./... + @echo "Security scan complete!" + +bench: ## Run benchmarks + @echo "Running benchmarks..." + @go test -bench=. -benchmem ./... + @echo "Benchmarks complete!" + + +##@ Tools +.PHONY: check-tools install-tools +# Check if required tools are installed +check-tools: + @command -v go >/dev/null 2>&1 || { echo "Go is required but not installed. Aborting." >&2; exit 1; } + @command -v docker >/dev/null 2>&1 || { echo "Docker is required but not installed. Aborting." >&2; exit 1; } + @command -v $(DOCKER_COMPOSE) >/dev/null 2>&1 || { echo "Docker Compose is required but not installed. Aborting." >&2; exit 1; } + +install-tools: ## Install development tools + @echo "Installing development tools..." + @go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + @go install golang.org/x/vuln/cmd/govulncheck@latest; + @echo "Development tools installed!" \ No newline at end of file diff --git a/TODOS.md b/TODOS.md index 9be18b7..6d54a1c 100644 --- a/TODOS.md +++ b/TODOS.md @@ -7,8 +7,11 @@ - flags loader for config variables -- OnShutDown functions to save current state on shutdown +- OnShutDown functions to save current state on shutdown ✅ + - expand to OnStart (unsure if this is necessary if handled correctly when registering services) -- If svc not in DC, then roundtrip decides priority +- AUTH -- AUTH \ No newline at end of file +- Webhooks notifies on event? + +- worker pool stats handling from manager \ No newline at end of file diff --git a/charts/gslb-operator/.helmignore b/charts/gslb-operator/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/charts/gslb-operator/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/gslb-operator/Chart.yaml b/charts/gslb-operator/Chart.yaml new file mode 100644 index 0000000..d659aea --- /dev/null +++ b/charts/gslb-operator/Chart.yaml @@ -0,0 +1,27 @@ +apiVersion: v2 +name: gslb-operator +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" + +# Chart icon - displayed in Helm repository UIs like ArtifactHub +icon: https://vitistack.io/images/viti1.svg diff --git a/charts/gslb-operator/templates/NOTES.txt b/charts/gslb-operator/templates/NOTES.txt new file mode 100644 index 0000000..f38c646 --- /dev/null +++ b/charts/gslb-operator/templates/NOTES.txt @@ -0,0 +1,29 @@ +1. Get the application URL by running these commands: +{{- if .Values.httpRoute.enabled }} +{{- if .Values.httpRoute.hostnames }} + export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }} +{{- else }} + export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}") + {{- end }} +{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }} + echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application" + + NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules. + The rules can be set for path, method, header and query parameters. + You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml' +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "gslb-operator.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "gslb-operator.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "gslb-operator.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "gslb-operator.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/charts/gslb-operator/templates/_helpers.tpl b/charts/gslb-operator/templates/_helpers.tpl new file mode 100644 index 0000000..8a407f0 --- /dev/null +++ b/charts/gslb-operator/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the gslb-operator. +*/}} +{{- define "gslb-operator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains gslb-operator name it will be used as a full name. +*/}} +{{- define "gslb-operator.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create gslb-operator name and version as used by the gslb-operator label. +*/}} +{{- define "gslb-operator.gslb-operator" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "gslb-operator.labels" -}} +helm.sh/gslb-operator: {{ include "gslb-operator.gslb-operator" . }} +{{ include "gslb-operator.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "gslb-operator.selectorLabels" -}} +app.kubernetes.io/name: {{ include "gslb-operator.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "gslb-operator.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "gslb-operator.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/charts/gslb-operator/templates/configmap.yaml b/charts/gslb-operator/templates/configmap.yaml new file mode 100644 index 0000000..e052afd --- /dev/null +++ b/charts/gslb-operator/templates/configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: gslb-operator-config + namespace: {{ .Values.namespace }} +data: + SRV_ENV: {{ .Values.settings.env }} + SRV_LUA_SANDBOX: {{ .Values.settings.sandbox }} + API_PORT: {{ .Values.settings.port }} + GSLB_POLL_INTERVAL: {{ .Values.settings.poll_interval }} + GSLB_UPDATER_HOST: {{ .Values.settings.gslb_updater }} diff --git a/charts/gslb-operator/templates/credentials.yaml b/charts/gslb-operator/templates/credentials.yaml new file mode 100644 index 0000000..1b14ecb --- /dev/null +++ b/charts/gslb-operator/templates/credentials.yaml @@ -0,0 +1,31 @@ +{{- if .Values.vault.enable }} +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: gslb-operator-secrets + namespace: {{ .Values.namespace }} +spec: + refreshInterval: 1h + secretStoreRef: + name: lb-openbao + kind: ClusterSecretStore + target: + name: gslb-operator-secrets + data: + - secretKey: JWT_SECRET + remoteRef: + key: /gslb-operator + property: jwt-secret + - secretKey: JWT_USER + remoteRef: + key: /gslb-operator + property: jwt-user + - secretKey: GSLB_ZONE + remoteRef: + key: /gslb-operator + property: gslb-zone + - secretKey: GSLB_NAMESERVER + remoteRef: + key: /gslb-operator + property: gslb-nameserver +{{- end }} diff --git a/charts/gslb-operator/templates/deployment.yaml b/charts/gslb-operator/templates/deployment.yaml new file mode 100644 index 0000000..c1290b2 --- /dev/null +++ b/charts/gslb-operator/templates/deployment.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "gslb-operator.fullname" . }} + namespace: {{ .Values.namespace }} + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "gslb-operator.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gslb-operator.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "gslb-operator.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + envFrom: + - configMapRef: + name: gslb-operator-config + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/charts/gslb-operator/templates/hpa.yaml b/charts/gslb-operator/templates/hpa.yaml new file mode 100644 index 0000000..e261eb6 --- /dev/null +++ b/charts/gslb-operator/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "gslb-operator.fullname" . }} + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "gslb-operator.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/charts/gslb-operator/templates/httproute.yaml b/charts/gslb-operator/templates/httproute.yaml new file mode 100644 index 0000000..afe2371 --- /dev/null +++ b/charts/gslb-operator/templates/httproute.yaml @@ -0,0 +1,38 @@ +{{- if .Values.httpRoute.enabled -}} +{{- $fullName := include "gslb-operator.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: {{ $fullName }} + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} + {{- with .Values.httpRoute.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + parentRefs: + {{- with .Values.httpRoute.parentRefs }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.httpRoute.hostnames }} + hostnames: + {{- toYaml . | nindent 4 }} + {{- end }} + rules: + {{- range .Values.httpRoute.rules }} + {{- with .matches }} + - matches: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .filters }} + filters: + {{- toYaml . | nindent 8 }} + {{- end }} + backendRefs: + - name: {{ $fullName }} + port: {{ $svcPort }} + weight: 1 + {{- end }} +{{- end }} diff --git a/charts/gslb-operator/templates/rbac/role.yaml b/charts/gslb-operator/templates/rbac/role.yaml new file mode 100644 index 0000000..e3c22c8 --- /dev/null +++ b/charts/gslb-operator/templates/rbac/role.yaml @@ -0,0 +1,9 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} + name: gslb-operator-role +rules: [] +{{- end }} \ No newline at end of file diff --git a/charts/gslb-operator/templates/rbac/role_binding.yaml b/charts/gslb-operator/templates/rbac/role_binding.yaml new file mode 100644 index 0000000..67c7b88 --- /dev/null +++ b/charts/gslb-operator/templates/rbac/role_binding.yaml @@ -0,0 +1,16 @@ +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} + name: gslb-operator-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gslb-operator-role +subjects: +- kind: ServiceAccount + name: {{ .Values.serviceAccount.name }} + namespace: {{ .Values.namespace | default .Release.Namespace }} +{{- end -}} \ No newline at end of file diff --git a/charts/gslb-operator/templates/rbac/service_account.yaml b/charts/gslb-operator/templates/rbac/service_account.yaml new file mode 100644 index 0000000..aaac462 --- /dev/null +++ b/charts/gslb-operator/templates/rbac/service_account.yaml @@ -0,0 +1,15 @@ +{{- if .Values.rbac.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} + {{- if and .Values.serviceAccount .Values.serviceAccount.annotations }} + annotations: + {{- range $key, $value := .Values.serviceAccount.annotations }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + name: {{ .Values.serviceAccount.name }} + namespace: {{ .Values.namespace | default .Release.Namespace }} +{{- end }} \ No newline at end of file diff --git a/charts/gslb-operator/templates/service.yaml b/charts/gslb-operator/templates/service.yaml new file mode 100644 index 0000000..36be3d1 --- /dev/null +++ b/charts/gslb-operator/templates/service.yaml @@ -0,0 +1,17 @@ +{{- if .Values.service.create }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "gslb-operator.fullname" . }} + labels: + {{- include "gslb-operator.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "gslb-operator.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/gslb-operator/values.yaml b/charts/gslb-operator/values.yaml new file mode 100644 index 0000000..ef4c32a --- /dev/null +++ b/charts/gslb-operator/values.yaml @@ -0,0 +1,168 @@ +# Default values for chart. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +namespace: "gslb-operator" + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: ncr.sky.nhn.no/vitistack/gslb-operator + # This sets the pull policy for images. + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "sa-gslb-operator" + +rbac: + create: true + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: + fsGroup: 2000 + +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: false + #runAsNonRoot: true + #runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + create: false + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 80 + +# -- Expose the service via gateway-api HTTPRoute +# Requires Gateway API resources and suitable controller installed within the cluster +# (see: https://gateway-api.sigs.k8s.io/guides/) +httpRoute: + # HTTPRoute enabled. + enabled: false + # HTTPRoute annotations. + annotations: {} + # Which Gateways this Route is attached to. + parentRefs: + - name: gateway + sectionName: http + # namespace: default + # Hostnames matching HTTP header. + hostnames: + - chart-example.local + # List of rules and filters applied. + rules: + - matches: + - path: + type: PathPrefix + value: /headers + # filters: + # - type: RequestHeaderModifier + # requestHeaderModifier: + # set: + # - name: My-Overwrite-Header + # value: this-is-the-only-value + # remove: + # - User-Agent + # - matches: + # - path: + # type: PathPrefix + # value: /echo + # headers: + # - name: version + # value: v2 + +resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + #limits: + # cpu: 100m + # memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +#livenessProbe: +# httpGet: +# path: / +# port: http +#readinessProbe: +# httpGet: +# path: / +# port: http + +# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 1 + targetCPUUtilizationPercentage: 60 + targetMemoryUtilizationPercentage: 60 + +# Additional volumes on the output Deployment definition. +volumes: + - name: secrets + secret: + secretName: gslb-operator-secrets + optional: false + - name: data + emptyDir: {} + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: + - name: secrets + mountPath: "/app/secrets" + readOnly: true + - name: data + mountPath: "/app/data" + readOnly: false + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +settings: + env: prod + sandbox: sandbox.lua + port: :3000 + poll_interval: 1m + gslb_updater: 127.0.0.1:9000 + +vault: + enable: true diff --git a/cmd/main.go b/cmd/main.go index 8cff1e2..bbd670b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -10,23 +10,34 @@ import ( "syscall" "time" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/vitistack/gslb-operator/internal/api/handlers/failover" - spoofs "github.com/vitistack/gslb-operator/internal/api/handlers/spoofs" + "github.com/vitistack/gslb-operator/internal/api/handlers/spoofs" "github.com/vitistack/gslb-operator/internal/api/routes" "github.com/vitistack/gslb-operator/internal/config" "github.com/vitistack/gslb-operator/internal/dns" + "github.com/vitistack/gslb-operator/internal/dns/update" "github.com/vitistack/gslb-operator/internal/manager" - spoofsrepo "github.com/vitistack/gslb-operator/internal/repositories/spoof" + "github.com/vitistack/gslb-operator/internal/model" + "github.com/vitistack/gslb-operator/internal/repositories/service" "github.com/vitistack/gslb-operator/pkg/auth" "github.com/vitistack/gslb-operator/pkg/auth/jwt" "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/lua" - apiContractSpoof "github.com/vitistack/gslb-operator/pkg/models/spoofs" "github.com/vitistack/gslb-operator/pkg/persistence/store/file" "github.com/vitistack/gslb-operator/pkg/rest/middleware" ) +var ( // injected at buildtime + version string + buildDate string +) + func main() { + bslog.Info("Running GSLB - Operator", + slog.String("version", version), + slog.String("build-date", buildDate), + ) cfg := config.GetInstance() // initialize lua execution environment @@ -34,25 +45,26 @@ func main() { bslog.Fatal("could not load lua configuration", slog.Any("reason", err)) } - // creating dns - handler objects - zoneFetcher := dns.NewZoneFetcherWithAutoPoll() - mgr := manager.NewManager( - manager.WithMinRunningWorkers(100), - manager.WithNonBlockingBufferSize(110), - ) - - spoofsFileStore, err := file.NewStore[apiContractSpoof.Spoof]("store.json") + serviceFileStore, err := file.NewStore[model.GSLBServiceGroup]("./data/store.json") if err != nil { bslog.Fatal("could not create persistent storage", slog.String("reason", err.Error())) } + svcRepo := service.NewServiceRepo(serviceFileStore) - spoofRepo := spoofsrepo.NewRepository(spoofsFileStore) - updater, err := dns.NewUpdater( - dns.UpdaterWithSpoofRepo(spoofRepo), + // creating dns - handler objects + zoneFetcher := dns.NewZoneFetcherWithAutoPoll() + mgr := manager.NewManager( + manager.WithMinRunningWorkers(80), + manager.WithNonBlockingBufferSize(50), + manager.WithServiceRepository(svcRepo), + //manager.WithDryRun(true), ) + + updater, err := update.NewDNSDISTUpdater(serviceFileStore) if err != nil { bslog.Fatal("unable to create updater", slog.String("error", err.Error())) } + dnsHandler := dns.NewHandler( zoneFetcher, mgr, @@ -60,14 +72,24 @@ func main() { ) background := context.Background() - dnsHandler.Start(context.WithCancel(background)) + ctx, cancel := context.WithCancel(background) + dnsHandler.Start(ctx, cancel) + updater.Synchronize(ctx) + + //configs := getRandomGSLBConfig() + //for _, cfg := range configs { + // _, err := mgr.RegisterService(cfg) + // if err != nil { + // bslog.Fatal("could not create service", slog.String("reason", err.Error())) + // } + //} api := http.NewServeMux() // routes handlers - spoofsApiService := spoofs.NewSpoofsService(spoofRepo, mgr) + spoofsApiService := spoofs.NewSpoofsService(serviceFileStore, mgr) - failoverApiService := failover.NewFailoverService(spoofRepo, mgr) + failoverApiService := failover.NewFailoverService(mgr) // initializing the service jwt self signer jwt.InitServiceTokenManager(cfg.JWT().Secret(), cfg.JWT().User()) @@ -109,6 +131,9 @@ func main() { middleware.WithIncomingRequestLogging(slog.Default()), )(spoofsApiService.DeleteOverride)) + // metrics + api.Handle(routes.METRICS, promhttp.Handler()) + server := http.Server{ Addr: cfg.API().Port(), Handler: api, @@ -131,12 +156,37 @@ func main() { case <-quit: bslog.Info("gracefully shutting down...") } - - shutdown, cancel := context.WithTimeout(background, time.Second*5) + + shutdown, cancel := context.WithTimeout(background, time.Second*20) defer cancel() - + dnsHandler.Stop(shutdown) if err := server.Shutdown(shutdown); err != nil { panic("error shutting down server: " + err.Error()) } } + +//func getRandomGSLBConfig() []model.GSLBConfig { +// configs := make([]model.GSLBConfig, 0, 500) +// +// cfg := model.GSLBConfig{ +// Fqdn: "test.example.com", +// Ip: "10.10.0.1", +// Port: "80", +// Datacenter: "DC1", +// Interval: timesutil.FromDuration(time.Second * 5), +// Priority: 1, +// FailureThreshold: 3, +// CheckType: checks.TCP_FULL, +// } +// +// for idx := range cap(configs) { +// +// cfg.ServiceID = fmt.Sprintf("%d", idx) +// cfg.MemberOf = fmt.Sprintf("%s.%s", cfg.ServiceID, cfg.Fqdn) +// +// configs = append(configs, cfg) +// } +// +// return configs +//} diff --git a/docker-compose.yaml b/docker-compose.yaml index 764edf1..fbeede4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,6 +7,9 @@ services: # - ./policies:/policies opa: image: openpolicyagent/opa:0.60.0 + profiles: + - "opa" + - "all" command: - "run" - "--server" @@ -25,3 +28,51 @@ services: interval: 10s timeout: 5s retries: 3 + + prometheus: + image: prom/prometheus:latest + profiles: + - "monitoring" + - "all" + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + ports: + - "9090:9090" + volumes: + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/healthy"] + interval: 10s + timeout: 5s + retries: 3 + + grafana: + image: grafana/grafana:latest + profiles: + - "monitoring" + - "all" + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - grafana-data:/var/lib/grafana + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro + depends_on: + - prometheus + restart: unless-stopped + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:3000/api/health"] + interval: 10s + timeout: 5s + retries: 3 + +volumes: + prometheus-data: + grafana-data: \ No newline at end of file diff --git a/examples/prometheus.ql b/examples/prometheus.ql new file mode 100644 index 0000000..acf421e --- /dev/null +++ b/examples/prometheus.ql @@ -0,0 +1,17 @@ +# worker-pool size +worker_pool_size_total + +# number of service groups +service_groups_total + +# number of registered services +sum(service_group_members) + +# number of health checks in the last +sum(increase(healthcheck_total[$__rate_interval])) + +# average health check duration towards each datacenter +sum by(datacenter) (rate(healthcheck_duration_ms_sum[5m])) / sum by(datacenter) (rate(healthcheck_duration_ms_count[5m])) + +# health check success rate percentage towards each datacenter +(sum by(datacenter) (rate(healthcheck_total{status="success"}[$__rate_interval]))) * 100 / (sum by(datacenter) (rate(healthcheck_total[$__rate_interval]))) diff --git a/go.mod b/go.mod index d3b6b67..36a1c4b 100644 --- a/go.mod +++ b/go.mod @@ -1,18 +1,28 @@ module github.com/vitistack/gslb-operator -go 1.25.0 +go 1.26 require ( - codeberg.org/miekg/dns v0.5.21 + codeberg.org/miekg/dns v0.6.48 github.com/golang-jwt/jwt/v5 v5.3.1 github.com/google/uuid v1.6.0 github.com/joho/godotenv v1.5.1 - github.com/tevino/tcp-shaker v0.0.0-20251020080735-c4094cd6c927 + github.com/prometheus/client_golang v1.23.2 + github.com/tevino/tcp-shaker v0.0.0-20260210162928-fb888f26451b github.com/yuin/gopher-lua v1.1.1 - golang.org/x/crypto v0.43.0 + golang.org/x/crypto v0.48.0 ) require ( - golang.org/x/net v0.46.0 // indirect - golang.org/x/sys v0.37.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/procfs v0.19.2 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/net v0.50.0 // indirect + golang.org/x/sys v0.41.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index 2791e99..3809fd9 100644 --- a/go.sum +++ b/go.sum @@ -1,18 +1,58 @@ -codeberg.org/miekg/dns v0.5.21 h1:O+Ibq9IJuOeMoBnNmYdQmKJ7J9zgEsUqcbBhjsSrzIc= -codeberg.org/miekg/dns v0.5.21/go.mod h1:Q10KolpjjNhl9x14KdKA3s+7Xynb8Zqvjj9jWyzrYRA= +codeberg.org/miekg/dns v0.6.48 h1:+RZiJMKPq5BYjePB7AfTv7O+qf/3Kjsz9C4WmOUHdoA= +codeberg.org/miekg/dns v0.6.48/go.mod h1:fIxAzBMDPnXWSw0fp8+pfZMRiAqYY4+HHYLzUo/S6Dg= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= -github.com/tevino/tcp-shaker v0.0.0-20251020080735-c4094cd6c927 h1:BdtSwzS6fNIAC3Ylj3x/ak6PD4EV885gGhWR7eIplEI= -github.com/tevino/tcp-shaker v0.0.0-20251020080735-c4094cd6c927/go.mod h1:S0VUAF1puvgOrlSQqCrJiz2t7yn2gPKYSpGu4+w8eg0= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= +github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tevino/tcp-shaker v0.0.0-20260210162928-fb888f26451b h1:vmeHwA9U5lODKqvdZQxKqy+i1Q2yMwShjxytoszeWmw= +github.com/tevino/tcp-shaker v0.0.0-20260210162928-fb888f26451b/go.mod h1:bNnAwCfoEQXR47eBqFYS9fD6qTcY3t5ZUUgBZskRdcY= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= -golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= -golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= -golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= -golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= -golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= -golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= +golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= +golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/api/handlers/auth/auth.go b/internal/api/handlers/auth/auth.go index 60632f7..9582994 100644 --- a/internal/api/handlers/auth/auth.go +++ b/internal/api/handlers/auth/auth.go @@ -1,5 +1,4 @@ package auth type AuthService struct { - -} \ No newline at end of file +} diff --git a/internal/api/handlers/failover/failover.go b/internal/api/handlers/failover/failover.go index 7f48014..f58baa2 100644 --- a/internal/api/handlers/failover/failover.go +++ b/internal/api/handlers/failover/failover.go @@ -5,7 +5,6 @@ import ( "net/http" "github.com/vitistack/gslb-operator/internal/manager" - "github.com/vitistack/gslb-operator/internal/repositories/spoof" "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/models/failover" "github.com/vitistack/gslb-operator/pkg/rest/request" @@ -13,13 +12,11 @@ import ( ) type FailoverService struct { - spoofRepo *spoof.Repository serviceManager manager.QueryManager } -func NewFailoverService(repo *spoof.Repository, mgr manager.QueryManager) *FailoverService { +func NewFailoverService(mgr manager.QueryManager) *FailoverService { return &FailoverService{ - spoofRepo: repo, serviceManager: mgr, } } diff --git a/internal/api/handlers/spoofs/overrides.go b/internal/api/handlers/spoofs/overrides.go index f6430ad..799234a 100644 --- a/internal/api/handlers/spoofs/overrides.go +++ b/internal/api/handlers/spoofs/overrides.go @@ -13,6 +13,8 @@ import ( "log/slog" "net/http" + "github.com/vitistack/gslb-operator/internal/api/routes" + "github.com/vitistack/gslb-operator/internal/model" spoofRepo "github.com/vitistack/gslb-operator/internal/repositories/spoof" "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/models/spoofs" @@ -22,23 +24,23 @@ import ( func (ss *SpoofsService) GetOverride(w http.ResponseWriter, r *http.Request) { logger := bslog.With(slog.Any("request_id", r.Context().Value("id"))) - fqdn := r.PathValue("fqdn") + memberOf := r.PathValue(routes.MemberOf) - if fqdn == "" { - logger.Error("skipping request due to insufficient input parameters", slog.String("reason", "missing fqdn")) - response.Err(w, response.ErrInvalidInput, "missing fqdn") + if memberOf == "" { + logger.Error("skipping request due to insufficient input parameters", slog.String("reason", "missing member-of")) + response.Err(w, response.ErrInvalidInput, "missing member-of") return } - exist, err := ss.SpoofRepo.ReadFQDN(fqdn) + exist, err := ss.svcRepo.GetActive(memberOf) if err != nil { logger.Error("could not read spoofs", slog.String("reason", err.Error())) response.Err(w, response.ErrInternalError, "") return } - if exist.DC != "OVERRIDE" { - logger.Error("service does not have an active override", slog.String("fqdn", exist.FQDN)) + if !exist.HasOverride { + logger.Error("service does not have an active override", slog.String("memberOf", exist.MemberOf)) response.Err(w, response.ErrNotFound, "not an active override") return } @@ -63,8 +65,8 @@ func (ss *SpoofsService) CreateOverride(w http.ResponseWriter, r *http.Request) err = ss.newOverride(override) if err != nil { logger.Error("could not override spoof", slog.String("reason", err.Error())) - if errors.Is(err, spoofRepo.ErrSpoofWithFQDNNotFound) { - response.Err(w, response.ErrNotFound, "fqdn not found: "+override.FQDN) + if errors.Is(err, spoofRepo.ErrSpoofInServiceGroupNotFound) { + response.Err(w, response.ErrNotFound, "group: "+override.MemberOf) return } @@ -89,8 +91,8 @@ func (ss *SpoofsService) UpdateOverride(w http.ResponseWriter, r *http.Request) err = ss.updateOverride(override) if err != nil { logger.Error("could not update spoof", slog.String("reason", err.Error())) - if errors.Is(err, spoofRepo.ErrSpoofWithFQDNNotFound) { - response.Err(w, response.ErrNotFound, "fqdn not found: "+override.FQDN) + if errors.Is(err, spoofRepo.ErrSpoofInServiceGroupNotFound) { + response.Err(w, response.ErrNotFound, "group: "+override.MemberOf) return } @@ -123,93 +125,75 @@ func (ss *SpoofsService) DeleteOverride(w http.ResponseWriter, r *http.Request) } func (ss *SpoofsService) newOverride(override spoofs.Override) error { - exist, err := ss.SpoofRepo.ReadFQDN(override.FQDN) + exist, err := ss.svcRepo.GetActive(override.MemberOf) if err != nil { - return fmt.Errorf("unable to read spoofs from storage: %w", err) + return fmt.Errorf("unable to get active service for group: %s: %w", override.MemberOf, err) } - if exist.DC == "OVERRIDE" { - return fmt.Errorf("service already has active override: %s", exist.FQDN) + if exist.HasOverride { + return fmt.Errorf("service already has active override: %s", exist.MemberOf) } - err = ss.SpoofRepo.Delete(exist.Key()) - if err != nil { - return fmt.Errorf("could not delete old spoof: %w", err) - } - - exist.DC = "OVERRIDE" exist.IP = override.IP.String() + exist.HasOverride = true - err = ss.SpoofRepo.Create(exist.Key(), &exist) + err = ss.svcRepo.Update(&exist) if err != nil { - return fmt.Errorf("could not create spoof: %w", err) + return fmt.Errorf("failed to update GSLB service with override flag: %w", err) } return nil } func (ss *SpoofsService) updateOverride(override spoofs.Override) error { - exist, err := ss.SpoofRepo.ReadFQDN(override.FQDN) + active, err := ss.svcRepo.GetActive(override.MemberOf) if err != nil { - return fmt.Errorf("unable to read spoofs from storage: %w", err) + return fmt.Errorf("unable to get active service for group: %s: %w", override.MemberOf, err) } - if exist.DC != "OVERRIDE" { - return fmt.Errorf("%s does not have an active override", override.FQDN) + if active.HasOverride { + return fmt.Errorf("service already has active override: %s", active.MemberOf) } - exist.IP = override.IP.String() + active.IP = override.IP.String() - err = ss.SpoofRepo.Update(exist.Key(), &exist) + err = ss.svcRepo.UpdateOverride(override.IP.String(), &active) if err != nil { - return fmt.Errorf("could not update spoof: %w", err) + return fmt.Errorf("failed to update GSLB service with override flag: %w", err) } return nil } func (ss *SpoofsService) deleteOverride(override spoofs.Override) error { - exist, err := ss.SpoofRepo.ReadFQDN(override.FQDN) + exist, err := ss.svcRepo.GetActive(override.MemberOf) if err != nil { - return fmt.Errorf("unable to read spoofs from storage: %w", err) + return fmt.Errorf("unable to get active service for group: %s: %w", override.MemberOf, err) } - if exist.DC != "OVERRIDE" { - return fmt.Errorf("%s does not have an override currently set", override.FQDN) + if !exist.HasOverride { + return fmt.Errorf("%s does not have an override currently set", override.MemberOf) } - spoof := ss.restoreSpoof(override) - err = ss.SpoofRepo.Delete(exist.Key()) + err = ss.svcRepo.RemoveOverrideFlag(override.MemberOf) if err != nil { - return fmt.Errorf("could not update spoof: %w", err) - } - - if spoof == nil { // if not possible to create new spoof, we return with NO spoof for the fqdn - return nil + return fmt.Errorf("failed to remove override flag: %w", err) } - err = ss.SpoofRepo.Create(spoof.Key(), spoof) + active := ss.restoreActive(override) + err = ss.svcRepo.Update(active) if err != nil { - return fmt.Errorf("could not create spoof for active service: %w", err) + return fmt.Errorf("could not restore active service in group after override flag has been removed: %w", err) } return nil } -func (ss *SpoofsService) restoreSpoof(override spoofs.Override) *spoofs.Spoof { - svc := ss.serviceManager.GetActiveForFQDN(override.FQDN) +func (ss *SpoofsService) restoreActive(override spoofs.Override) *model.GSLBService { + svc := ss.serviceManager.GetActiveForMemberOf(override.MemberOf) if svc == nil { // no active service: e.g. no spoof should be there return nil } - ip, err := svc.GetIP() - if err != nil { - return nil - } - - return &spoofs.Spoof{ - FQDN: svc.Fqdn, - DC: svc.Datacenter, - IP: ip, - } + return svc.GSLBService() } diff --git a/internal/api/handlers/spoofs/service.go b/internal/api/handlers/spoofs/service.go index f32c06b..69a31ad 100644 --- a/internal/api/handlers/spoofs/service.go +++ b/internal/api/handlers/spoofs/service.go @@ -2,17 +2,22 @@ package spoofs import ( "github.com/vitistack/gslb-operator/internal/manager" + "github.com/vitistack/gslb-operator/internal/model" + "github.com/vitistack/gslb-operator/internal/repositories/service" "github.com/vitistack/gslb-operator/internal/repositories/spoof" + "github.com/vitistack/gslb-operator/pkg/persistence" ) type SpoofsService struct { - SpoofRepo *spoof.Repository + svcRepo *service.ServiceRepo + spoofRepo *spoof.SpoofRepo serviceManager manager.QueryManager } -func NewSpoofsService(repo *spoof.Repository, svcManager manager.QueryManager) *SpoofsService { +func NewSpoofsService(store persistence.Store[model.GSLBServiceGroup], svcManager manager.QueryManager) *SpoofsService { return &SpoofsService{ - SpoofRepo: repo, + svcRepo: service.NewServiceRepo(store), + spoofRepo: spoof.NewSpoofRepo(store), // create read-only serviceManager: svcManager, } } diff --git a/internal/api/handlers/spoofs/spoofs.go b/internal/api/handlers/spoofs/spoofs.go index 3f93f42..ae392c0 100644 --- a/internal/api/handlers/spoofs/spoofs.go +++ b/internal/api/handlers/spoofs/spoofs.go @@ -17,7 +17,7 @@ import ( ) func (ss *SpoofsService) GetSpoofs(w http.ResponseWriter, r *http.Request) { - data, err := ss.SpoofRepo.ReadAll() + data, err := ss.spoofRepo.ReadAll() if err != nil { response.Err(w, response.ErrInternalError, "unable to fetch spoofs from storage") bslog.Error("Unable to fetch spoofs", slog.String("reason", err.Error())) @@ -43,7 +43,7 @@ func (ss *SpoofsService) GetFQDNSpoof(w http.ResponseWriter, r *http.Request) { return } - spoof, err := ss.SpoofRepo.Read(fqdn) + spoof, err := ss.spoofRepo.Read(fqdn) if err != nil { msg := "unable to fetch spoof with id: " + fqdn + " from storage" response.Err(w, response.ErrInternalError, msg) @@ -55,7 +55,7 @@ func (ss *SpoofsService) GetFQDNSpoof(w http.ResponseWriter, r *http.Request) { } func (ss *SpoofsService) GetSpoofsHash(w http.ResponseWriter, r *http.Request) { - data, err := ss.SpoofRepo.ReadAll() + data, err := ss.spoofRepo.ReadAll() if err != nil { response.Err(w, response.ErrInternalError, "unable to fetch spoofs from storage") bslog.Error("unable to read spoofs from storage", slog.String("reason", err.Error())) diff --git a/internal/api/routes/const.go b/internal/api/routes/const.go index 177a702..60f2412 100644 --- a/internal/api/routes/const.go +++ b/internal/api/routes/const.go @@ -1,6 +1,8 @@ package routes -import "net/http" +import ( + "net/http" +) const ( ROOT = "/" @@ -13,9 +15,9 @@ const ( GET_SPOOFS_HASH = http.MethodGet + " " + SPOOFS_HASH // Route to hash all spoofs, for config validation POST_SPOOF = http.MethodPost + " " + SPOOFS // Route POST - OVERRIDE = SPOOFS + "/override" // override DNSDIST configuration - GET_OVERRIDE = http.MethodGet + " " + OVERRIDE + "/{fqdn}" // Route GET - POST_OVERRIDE = http.MethodPost + " " + OVERRIDE // Route POST + OVERRIDE = SPOOFS + "/override" // override DNSDIST configuration + GET_OVERRIDE = http.MethodGet + " " + OVERRIDE + "/{" + MemberOf + "}" // Route GET + POST_OVERRIDE = http.MethodPost + " " + OVERRIDE // Route POST PUT_OVERRIDE = http.MethodPut + " " + OVERRIDE + "/{fqdn}" DELETE_OVERRIDE = http.MethodDelete + " " + OVERRIDE // Route DELETE @@ -25,4 +27,11 @@ const ( AUTH = ROOT + "auth" AUTH_LOGIN = AUTH + "/login" POST_AUTH_LOGIN = http.MethodPost + " " + AUTH_LOGIN + + METRICS = ROOT + "metrics" + GET_METRICS = http.MethodGet + " " + METRICS +) + +const ( + MemberOf = "memberOf" ) diff --git a/internal/checks/checker.go b/internal/checks/checker.go index a47a174..28ec341 100644 --- a/internal/checks/checker.go +++ b/internal/checks/checker.go @@ -1,6 +1,8 @@ package checks +import "time" + type Checker interface { Check() error + Roundtrip() time.Duration } - diff --git a/internal/checks/dryrun.go b/internal/checks/dryrun.go index 177f920..45f3daa 100644 --- a/internal/checks/dryrun.go +++ b/internal/checks/dryrun.go @@ -3,14 +3,22 @@ package checks import ( "errors" "math/rand" + "time" ) type DryRun struct{} func (dr *DryRun) Check() error { + + sleepDuration := time.Duration(100+rand.Intn(400)) * time.Millisecond + time.Sleep(sleepDuration) num := rand.Intn(10) if num == 0 { // 10% failure when dryrunning return errors.New("dry-run fail") } return nil } + +func (dr *DryRun) Roundtrip() time.Duration { + return time.Duration(0) +} diff --git a/internal/checks/http.go b/internal/checks/http.go index 23b189b..783ddb1 100644 --- a/internal/checks/http.go +++ b/internal/checks/http.go @@ -8,6 +8,7 @@ import ( ) type HTTPChecker struct { + *RoundTripper url string client *http.Client validator *LuaValidator @@ -25,7 +26,8 @@ func NewHTTPChecker(url string, timeout time.Duration, validationScripts ...stri } return &HTTPChecker{ - url: url, + RoundTripper: NewRoundtripper(), + url: url, client: &http.Client{ Timeout: timeout, Transport: transport, @@ -35,7 +37,9 @@ func NewHTTPChecker(url string, timeout time.Duration, validationScripts ...stri } func (c *HTTPChecker) Check() error { + c.startRecording() resp, err := c.client.Get(c.url) + c.endRecording() if err != nil { return err } @@ -51,3 +55,7 @@ func (c *HTTPChecker) Check() error { resp.Body.Close() return nil } + +func (c *HTTPChecker) Roundtrip() time.Duration { + return c.AverageRoundtripTime() +} diff --git a/internal/checks/roundtrip.go b/internal/checks/roundtrip.go new file mode 100644 index 0000000..8efe431 --- /dev/null +++ b/internal/checks/roundtrip.go @@ -0,0 +1,60 @@ +package checks + +import ( + "sync" + "time" +) + +type RoundTripper struct { + mu sync.RWMutex + currentTripStart time.Time + roundtrips []time.Duration + roundtripIdx int // current index to populate + count int + roundtripCapacity int +} + +func NewRoundtripper() *RoundTripper { + return &RoundTripper{ + mu: sync.RWMutex{}, + roundtrips: make([]time.Duration, 20), + roundtripIdx: 0, + count: 0, + roundtripCapacity: 20, + } +} + +func (rt *RoundTripper) startRecording() { + rt.mu.Lock() + defer rt.mu.Unlock() + rt.currentTripStart = time.Now() +} + +func (rt *RoundTripper) endRecording() { + rt.mu.Lock() + defer rt.mu.Unlock() + + rt.roundtrips[rt.roundtripIdx] = time.Since(rt.currentTripStart) + rt.roundtripIdx = (rt.roundtripIdx + 1) % rt.roundtripCapacity + + if rt.count < rt.roundtripCapacity { + rt.count++ + } +} + +func (rt *RoundTripper) AverageRoundtripTime() time.Duration { + rt.mu.RLock() + defer rt.mu.RUnlock() + + if rt.count == 0 { + return time.Duration(0) + } + + var sum time.Duration + + for _, trip := range rt.roundtrips { + sum += trip + } + + return sum / time.Duration(rt.count) +} diff --git a/internal/checks/tcp.go b/internal/checks/tcp.go index 63deb89..959e548 100644 --- a/internal/checks/tcp.go +++ b/internal/checks/tcp.go @@ -9,10 +9,15 @@ import ( ) type TCPChecker struct { + *RoundTripper addr string timeout time.Duration } +func (c *TCPChecker) Roundtrip() time.Duration { + return c.AverageRoundtripTime() +} + type TCPFullChecker struct { TCPChecker } @@ -30,15 +35,18 @@ func NewTCPChecker(typ, addr string, timeout time.Duration) Checker { func NewTCPFullChecker(addr string, timeout time.Duration) Checker { return &TCPFullChecker{ - TCPChecker: TCPChecker{ - addr: addr, - timeout: timeout, + TCPChecker{ + RoundTripper: NewRoundtripper(), + addr: addr, + timeout: timeout, }, } } func (tf *TCPFullChecker) Check() error { + tf.startRecording() conn, err := net.DialTimeout("tcp", tf.addr, tf.timeout) + tf.endRecording() if err != nil { return err } @@ -53,8 +61,9 @@ type TCPHalfChecker struct { func NewTCPHalfChecker(addr string, timeout time.Duration) Checker { return &TCPHalfChecker{ TCPChecker{ - addr: addr, - timeout: timeout, + RoundTripper: NewRoundtripper(), + addr: addr, + timeout: timeout, }, } } @@ -62,7 +71,9 @@ func NewTCPHalfChecker(addr string, timeout time.Duration) Checker { func (th *TCPHalfChecker) Check() error { checker := tcpshaker.DefaultChecker() + th.startRecording() err := checker.CheckAddr(th.addr, th.timeout) + th.endRecording() if err != nil { if errors.Is(err, tcpshaker.ErrTimeout) { return err diff --git a/internal/config/config.go b/internal/config/config.go index c3ba99d..6018ba6 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -73,7 +73,6 @@ func (c *Config) JWT() *JWT { // Server configuration type Server struct { ENV string `env:"SRV_ENV" flag:"env"` - DC string `env:"SRV_DATACENTER" flag:"datacenter"` LUA_SANDBOX string `env:"SRV_LUA_SANDBOX" flag:"lua-sandbox"` } @@ -81,10 +80,6 @@ func (s *Server) Env() string { return s.ENV } -func (s *Server) Datacenter() string { - return s.DC -} - func (s *Server) LuaSandbox() string { return s.LUA_SANDBOX } @@ -104,6 +99,7 @@ type GSLB struct { NAMESERVER string `env:"GSLB_NAMESERVER" flag:"gslb-nameserver"` POLLINTERVAL string `env:"GSLB_POLL_INTERVAL" flag:"poll-interval"` UPDATERHOST string `env:"GSLB_UPDATER_HOST" flag:"updater-host"` + SERVERS string `env:"GSLB_DNSDIST_SERVERS_FILE"` } func (g *GSLB) Zone() string { @@ -127,6 +123,10 @@ func (g *GSLB) UpdaterHost() string { return g.UPDATERHOST } +func (g *GSLB) Servers() string { + return g.SERVERS +} + type JWT struct { SECRET string `env:"JWT_SECRET"` USER string `env:"JWT_USER"` @@ -141,9 +141,18 @@ func (jwt *JWT) User() string { } func newConfig() (*Config, error) { + fileLoader, err := loaders.NewFileLoader( + ".env", + "./secrets", + ) + + if err != nil { + return nil, err + } + loader := loaders.NewChainLoader( loaders.NewEnvloader(), - loaders.NewFileLoader(".env"), + fileLoader, loaders.NewFlagLoader(), ) @@ -154,7 +163,9 @@ func newConfig() (*Config, error) { apiCfg := API{ PORT: ":8080", } - gslbCfg := GSLB{} + gslbCfg := GSLB{ + POLLINTERVAL: "1m", + } jwtCfg := JWT{} configs := []any{ diff --git a/internal/dns/const.go b/internal/dns/const.go index 48206d0..418a071 100644 --- a/internal/dns/const.go +++ b/internal/dns/const.go @@ -2,4 +2,4 @@ package dns import "time" -const DEFAULT_POLL_INTERVAL = time.Minute * 5 \ No newline at end of file +const DEFAULT_POLL_INTERVAL = time.Minute * 5 diff --git a/internal/dns/handler.go b/internal/dns/handler.go index 23b8cb9..52fde59 100644 --- a/internal/dns/handler.go +++ b/internal/dns/handler.go @@ -8,6 +8,7 @@ import ( "sync" "codeberg.org/miekg/dns" + "github.com/vitistack/gslb-operator/internal/dns/update" "github.com/vitistack/gslb-operator/internal/manager" "github.com/vitistack/gslb-operator/internal/model" "github.com/vitistack/gslb-operator/internal/service" @@ -18,14 +19,14 @@ import ( type Handler struct { fetcher *ZoneFetcher // fetch GSLB config from dns svcManager *manager.ServicesManager - updater *Updater + updater update.Updater knownServices map[string]struct{} // service.ID: makes it easier to look up using map, but dont need a real value! stop chan struct{} cancel func() // cancels context wg sync.WaitGroup } -func NewHandler(fetcher *ZoneFetcher, mgr *manager.ServicesManager, updater *Updater) *Handler { +func NewHandler(fetcher *ZoneFetcher, mgr *manager.ServicesManager, updater update.Updater) *Handler { return &Handler{ fetcher: fetcher, svcManager: mgr, @@ -78,11 +79,17 @@ func (h *Handler) Stop(ctx context.Context) { } func (h *Handler) onServiceDown(svc *service.Service) { - h.updater.ServiceDown(svc) + err := h.updater.OnServiceDown(svc) + if err != nil { + bslog.Warn("error while updating service on service down", slog.String("error", err.Error())) + } } func (h *Handler) onServiceUp(svc *service.Service) { - h.updater.ServiceUp(svc) + err := h.updater.OnServiceUp(svc) + if err != nil { + bslog.Warn("error while updating service state on service up", slog.String("error", err.Error())) + } } func (h *Handler) handleZoneUpdates(zone <-chan []dns.RR, pollErrors <-chan error) { diff --git a/internal/dns/update/dnsdist.go b/internal/dns/update/dnsdist.go new file mode 100644 index 0000000..b6d7761 --- /dev/null +++ b/internal/dns/update/dnsdist.go @@ -0,0 +1,234 @@ +package update + +import ( + "bufio" + "cmp" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "log/slog" + "os" + "regexp" + "slices" + "strings" + "sync" + "time" + + "github.com/vitistack/gslb-operator/internal/config" + "github.com/vitistack/gslb-operator/internal/model" + repo "github.com/vitistack/gslb-operator/internal/repositories/spoof" + "github.com/vitistack/gslb-operator/internal/service" + "github.com/vitistack/gslb-operator/pkg/bslog" + "github.com/vitistack/gslb-operator/pkg/dnsdist" + "github.com/vitistack/gslb-operator/pkg/models/spoofs" + "github.com/vitistack/gslb-operator/pkg/persistence" +) + +const DEFAULT_SYNCHRONIZE_JOB = time.Minute + +// contacts dnsdist servers to make update directly +type DNSDISTUpdater struct { + servers map[string]*dnsdist.Client + spoofRepo repo.SpoofRepo +} + +func NewDNSDISTUpdater(store persistence.Store[model.GSLBServiceGroup]) (*DNSDISTUpdater, error) { + updater := &DNSDISTUpdater{ + servers: make(map[string]*dnsdist.Client), + spoofRepo: *repo.NewSpoofRepo(store), + } + + file, err := os.ReadFile(config.GetInstance().GSLB().Servers()) + if err != nil { + return nil, fmt.Errorf("could could not load dnsdist servers configuration: %w", err) + } + servers := []model.DNSDISTServer{} + err = json.Unmarshal(file, &servers) + if err != nil { + return nil, fmt.Errorf("malformed dnsdist servers configuration: %w", err) + } + + for _, server := range servers { + client, err := dnsdist.NewClient( + server.Key, + dnsdist.WithHost(server.Host.String()), + dnsdist.WithPort(server.Port), + dnsdist.WithTimeout(time.Second*5), + dnsdist.WithNumRetriesOnCommandFailure(3), + ) + + if err != nil { + return nil, fmt.Errorf("unable to create dnsdist client: %w", err) + } + + updater.servers[server.Name] = client + } + + err = updater.synchronizeServers() + if err != nil { + return updater, fmt.Errorf("failed synchronization on updater init: %w", err) + } + + return updater, nil +} + +func (d *DNSDISTUpdater) OnServiceUp(svc *service.Service) error { + + for _, client := range d.servers { + err := client.AddDomainSpoof(svc.MemberOf+":"+svc.Datacenter, svc.MemberOf, svc.GetIP()) + if err != nil { + return fmt.Errorf("could not create dnsdist-spoof: %w", err) + } + } + + return nil +} + +func (d *DNSDISTUpdater) OnServiceDown(svc *service.Service) error { + for _, client := range d.servers { + err := client.RmRuleWithName(svc.MemberOf + ":" + svc.Datacenter) + if err != nil { + return fmt.Errorf("could not remove dnsdist-spoof: %w", err) + } + } + return nil +} + +func (d *DNSDISTUpdater) Synchronize(ctx context.Context) { + go func() { + for { + select { + case <-ctx.Done(): + bslog.Info("stopping dnsdist - server synchronization") + + // close controll socket connections + for _, client := range d.servers { + client.Disconnect() + } + + return + case <-time.After(DEFAULT_SYNCHRONIZE_JOB): + err := d.synchronizeServers() + if err != nil { + bslog.Error("unable to synchronize dnsdist - servers", slog.String("reason", err.Error())) + } + } + } + }() +} + +func (d *DNSDISTUpdater) synchronizeServers() error { + desiredHash, err := d.spoofRepo.Hash() + if err != nil { + return fmt.Errorf("unable to get hash representation of spoofs: %w", err) + } + + wg := sync.WaitGroup{} + + for server, client := range d.servers { + wg.Go(func() { + rawRuleSet, err := client.ShowRules() + if err != nil { + bslog.Error("unable to fetch ruleset from dnsdist server", slog.String("reason", err.Error())) + return + } + + data, err := d.ParseRuleSet(rawRuleSet) + if err != nil { + bslog.Error("could not synchronize dnsdist server", slog.String("reason", err.Error())) + } + + slices.SortFunc(data, func(a, b spoofs.Spoof) int { + return cmp.Compare(fmt.Sprintf("%s:%s", a.FQDN, a.DC), fmt.Sprintf("%s:%s", b.FQDN, b.DC)) + }) + + marshalledSpoofs, err := json.Marshal(data) + if err != nil { + bslog.Error("unable to marshall spoofs", slog.String("reason", err.Error())) + return + } + + rawHash := sha256.Sum256(marshalledSpoofs) // creating bytes representation of spoofs + hash := hex.EncodeToString(rawHash[:]) + if hash != desiredHash { + err := d.reconcileServer(client, data) + if err != nil { + bslog.Warn("failed to reconcile server", slog.String("server_name", server)) + } + } + }) + } + + wg.Wait() + + return nil +} + +func (d *DNSDISTUpdater) ParseRuleSet(ruleSet string) ([]spoofs.Spoof, error) { + reader := strings.NewReader(ruleSet) + lines := bufio.NewScanner(reader) + + pattern, err := regexp.Compile(`[a-zA-Z0-9._-]+:[A-Z0-9]+|spoof|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`) + if err != nil { + return nil, fmt.Errorf("unable to compile regex: %w", err) + } + + spoofRules := make([]spoofs.Spoof, 0) + for lines.Scan() { + line := lines.Text() + matches := pattern.FindAllString(line, -1) + if len(matches) < 3 { + continue + } + rule := dnsdist.Rule{ + Name: matches[0], + Action: matches[1], + } + + if rule.Action != "spoof" { + continue + } + + spoofRules = append(spoofRules, + spoofs.Spoof{ + FQDN: strings.Split(rule.Name, ":")[0], + DC: strings.Split(rule.Name, ":")[1], + IP: matches[2], + }) + } + + return spoofRules, nil +} + +func (d *DNSDISTUpdater) reconcileServer(client *dnsdist.Client, configuredSpoofs []spoofs.Spoof) error { + gslbspoofs, err := d.spoofRepo.ReadAll() + if err != nil { + return fmt.Errorf("could not fetch spoofs: %w", err) + } + + for _, spoof := range configuredSpoofs { // remove all spoofs that should not exist any more + if !slices.ContainsFunc(gslbspoofs, func(s spoofs.Spoof) bool { + return s.FQDN+":"+s.DC == spoof.FQDN+":"+spoof.DC + }) { + err := client.RmRuleWithName(spoof.FQDN + ":" + spoof.DC) + if err != nil { + return fmt.Errorf("could not remove spoof: %w", err) + } + } + } + + for _, spoof := range gslbspoofs { // add all spoofs that does not exist but should + if !slices.ContainsFunc(configuredSpoofs, func(s spoofs.Spoof) bool { + return s.FQDN+":"+s.DC == spoof.FQDN+":"+spoof.DC + }) { + err := client.AddDomainSpoof(spoof.FQDN+":"+spoof.DC, spoof.FQDN, spoof.IP) + if err != nil { + return fmt.Errorf("could not remove spoof: %w", err) + } + } + } + + return nil +} diff --git a/internal/dns/updater.go b/internal/dns/update/rest.go similarity index 50% rename from internal/dns/updater.go rename to internal/dns/update/rest.go index ea8bb06..e49a810 100644 --- a/internal/dns/updater.go +++ b/internal/dns/update/rest.go @@ -1,4 +1,6 @@ -package dns +package update + +// sends HTTP request to update dns import ( "fmt" @@ -7,27 +9,23 @@ import ( "time" "github.com/vitistack/gslb-operator/internal/config" - "github.com/vitistack/gslb-operator/internal/repositories/spoof" "github.com/vitistack/gslb-operator/internal/service" "github.com/vitistack/gslb-operator/pkg/auth/jwt" - "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/models/spoofs" - "github.com/vitistack/gslb-operator/pkg/persistence/store/memory" "github.com/vitistack/gslb-operator/pkg/rest/request" "github.com/vitistack/gslb-operator/pkg/rest/request/client" ) -type updaterOption func(u *Updater) +type updaterOption func(u *RESTUpdater) -type Updater struct { - Server string - spoofRepo *spoof.Repository - client client.HTTPClient - builder *request.Builder - mu *sync.Mutex +type RESTUpdater struct { + Server string + client client.HTTPClient + builder *request.Builder + mu *sync.Mutex } -func NewUpdater(opts ...updaterOption) (*Updater, error) { +func NewUpdater(opts ...updaterOption) (*RESTUpdater, error) { c, err := client.NewClient( time.Second*5, client.WithRetry(3), @@ -38,11 +36,10 @@ func NewUpdater(opts ...updaterOption) (*Updater, error) { return nil, fmt.Errorf("unable to create http client: %s", err.Error()) } - u := &Updater{ - Server: config.GetInstance().GSLB().UpdaterHost(), - spoofRepo: spoof.NewRepository(memory.NewStore[spoofs.Spoof]()), - client: *c, - mu: &sync.Mutex{}, + u := &RESTUpdater{ + Server: config.GetInstance().GSLB().UpdaterHost(), + client: *c, + mu: &sync.Mutex{}, } for _, opt := range opts { @@ -53,42 +50,19 @@ func NewUpdater(opts ...updaterOption) (*Updater, error) { return u, nil } -func UpdaterWithSpoofRepo(spoofRep *spoof.Repository) updaterOption { - return func(u *Updater) { - u.spoofRepo = spoofRep - } -} - func UpdaterWithServer(server string) updaterOption { - return func(u *Updater) { + return func(u *RESTUpdater) { u.Server = server } } func UpdaterWithClient(client *client.HTTPClient) updaterOption { - return func(u *Updater) { + return func(u *RESTUpdater) { u.client = *client } } -func (u *Updater) ServiceDown(svc *service.Service) error { - u.mu.Lock() - override, err := u.spoofRepo.HasOverride(svc.MemberOf) - if err != nil { - return fmt.Errorf("unable to delete spoof: %w", err) - } - - if override { - bslog.Debug("service has spoof active override", slog.Any("service", svc)) - return nil - } - - err = u.spoofRepo.Delete(fmt.Sprintf("%s:%s", svc.MemberOf, svc.Datacenter)) - u.mu.Unlock() - if err != nil { - return fmt.Errorf("unable to delete service from storage: %s", err.Error()) - } - +func (u *RESTUpdater) ServiceDown(svc *service.Service) error { token, err := jwt.GetInstance().GetServiceToken() if err != nil { return fmt.Errorf("could not fetch service token: %w", err) @@ -114,35 +88,7 @@ func (u *Updater) ServiceDown(svc *service.Service) error { return nil } -func (u *Updater) ServiceUp(svc *service.Service) error { - u.mu.Lock() - override, err := u.spoofRepo.HasOverride(svc.MemberOf) - if err != nil { - return fmt.Errorf("unable to store spoof: %w", err) - } - - if override { - bslog.Debug("service has spoof active override", slog.Any("service", svc)) - return nil - } - - ip, err := svc.GetIP() - if err != nil { - return fmt.Errorf("unable to get ip address: %s", err.Error()) - } - - spoof := &spoofs.Spoof{ - FQDN: svc.MemberOf, - IP: ip, - DC: svc.Datacenter, - } - - err = u.spoofRepo.Create(fmt.Sprintf("%s:%s", svc.MemberOf, svc.Datacenter), spoof) - u.mu.Unlock() - if err != nil { - return fmt.Errorf("could not store new spoof: %s", err.Error()) - } - +func (u *RESTUpdater) ServiceUp(svc *service.Service) error { token, err := jwt.GetInstance().GetServiceToken() if err != nil { return fmt.Errorf("could not fetch service token: %w", err) @@ -150,7 +96,11 @@ func (u *Updater) ServiceUp(svc *service.Service) error { req, err := u.builder.POST().SetHeader("Authorization", token). URL("/spoofs"). - Body(spoof). + Body(spoofs.Spoof{ + FQDN: svc.MemberOf, + IP: svc.GetIP(), + DC: svc.Datacenter, + }). Build() if err != nil { return fmt.Errorf("could not create post request for update: %s", err.Error()) diff --git a/internal/dns/update/updater.go b/internal/dns/update/updater.go new file mode 100644 index 0000000..c76bb2c --- /dev/null +++ b/internal/dns/update/updater.go @@ -0,0 +1,8 @@ +package update + +import "github.com/vitistack/gslb-operator/internal/service" + +type Updater interface { + OnServiceUp(*service.Service) error + OnServiceDown(*service.Service) error +} diff --git a/internal/manager/healthcheck/healtheck.go b/internal/manager/healthcheck/healtheck.go new file mode 100644 index 0000000..5891ba6 --- /dev/null +++ b/internal/manager/healthcheck/healtheck.go @@ -0,0 +1,53 @@ +package healthcheck + +import ( + "log/slog" + "time" + + "github.com/vitistack/gslb-operator/internal/service" + "github.com/vitistack/gslb-operator/pkg/bslog" +) + +type HealthCheckJob struct { + Service *service.Service + lastCheck time.Time +} + +func NewJob(svc *service.Service) *HealthCheckJob { + return &HealthCheckJob{ + Service: svc, + } +} + +func (hj *HealthCheckJob) Execute() error { + hj.lastCheck = time.Now() + err := hj.Service.Execute() + + checkTimeMs := float64(time.Since(hj.lastCheck).Milliseconds()) + + bslog.Debug("check complete", slog.Float64("duration_ms", checkTimeMs)) + healthCheckDuration.WithLabelValues( + hj.Service.MemberOf, + hj.Service.Fqdn, + hj.Service.Datacenter). + Observe(checkTimeMs) + return err +} + +func (hj *HealthCheckJob) OnSuccess() { + healthChecksTotal.WithLabelValues(hj.Service.MemberOf, + hj.Service.Fqdn, + hj.Service.Datacenter, + "success"). + Inc() + hj.Service.OnSuccess() +} + +func (hj *HealthCheckJob) OnFailure(err error) { + healthChecksTotal.WithLabelValues(hj.Service.MemberOf, + hj.Service.Fqdn, + hj.Service.Datacenter, + "failure"). + Inc() + hj.Service.OnFailure(err) +} diff --git a/internal/manager/healthcheck/metrics.go b/internal/manager/healthcheck/metrics.go new file mode 100644 index 0000000..f4aa0f8 --- /dev/null +++ b/internal/manager/healthcheck/metrics.go @@ -0,0 +1,26 @@ +package healthcheck + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + healthChecksTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "healthcheck_total", + Help: "Total health checks performed", + }, + []string{"memberOf", "endpoint", "datacenter", "status"}, + ) + + healthCheckDuration = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "healthcheck_duration_ms", + Help: "Health check duration", + Buckets: []float64{1, 5, 25, 50, 100, 250, 500, 1000, 2500, 5000}, + }, + []string{"memberOf", "endpoint", "datacenter"}, + ) +) + diff --git a/internal/manager/manager.go b/internal/manager/manager.go index 92ce252..b348b9f 100644 --- a/internal/manager/manager.go +++ b/internal/manager/manager.go @@ -7,12 +7,15 @@ import ( "sync" "time" + "github.com/vitistack/gslb-operator/internal/manager/healthcheck" "github.com/vitistack/gslb-operator/internal/manager/scheduler" "github.com/vitistack/gslb-operator/internal/model" + svcRepo "github.com/vitistack/gslb-operator/internal/repositories/service" "github.com/vitistack/gslb-operator/internal/service" "github.com/vitistack/gslb-operator/internal/utils/timesutil" "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/models/failover" + "github.com/vitistack/gslb-operator/pkg/persistence/store/memory" "github.com/vitistack/gslb-operator/pkg/pool" ) @@ -22,9 +25,10 @@ type ServicesManager struct { scheduledServices ScheduledServices // services that are scheduled on an interval schedulers map[timesutil.Duration]*scheduler.Scheduler // schedulers for health-checks serviceGroups map[string]*ServiceGroup + svcRepo *svcRepo.ServiceRepo mutex sync.RWMutex stop sync.Once - pool pool.WorkerPool + pool *pool.WorkerPool wg *sync.WaitGroup // schedulers use this when scheduling services asynchronously DNSUpdate func(*service.Service, bool) dryrun bool @@ -33,8 +37,9 @@ type ServicesManager struct { func NewManager(opts ...serviceManagerOption) *ServicesManager { cfg := managerConfig{ MinRunningWorkers: 100, - NonBlockingBufferSize: 110, + NonBlockingBufferSize: 100, DryRun: false, + repo: svcRepo.NewServiceRepo(memory.NewStore[model.GSLBServiceGroup]()), } for _, opt := range opts { @@ -45,12 +50,23 @@ func NewManager(opts ...serviceManagerOption) *ServicesManager { bslog.Warn("dry-run enabled") } + pool := pool.NewWorkerPool(cfg.MinRunningWorkers, cfg.NonBlockingBufferSize) + pool.OnScaleUp = func() { + bslog.Debug("worker-pool on scale up", slog.Int("numWorkers", int(pool.NumWorkers()))) + workerPoolSize.Inc() + } + pool.OnScaleDown = func() { + bslog.Debug("worker-pool on scale down", slog.Int("numWorkers", int(pool.NumWorkers()))) + workerPoolSize.Dec() + } + return &ServicesManager{ scheduledServices: make(ScheduledServices), schedulers: make(map[timesutil.Duration]*scheduler.Scheduler), serviceGroups: make(map[string]*ServiceGroup), + svcRepo: cfg.repo, mutex: sync.RWMutex{}, - pool: *pool.NewWorkerPool(cfg.MinRunningWorkers, cfg.NonBlockingBufferSize), + pool: pool, stop: sync.Once{}, wg: &sync.WaitGroup{}, dryrun: cfg.DryRun, @@ -64,20 +80,57 @@ func (sm *ServicesManager) Start() { } func (sm *ServicesManager) Stop() { - sm.pool.Stop() sm.stop.Do(func() { - for interval, scheduler := range sm.schedulers { + for _, scheduler := range sm.schedulers { scheduler.Stop() - bslog.Debug("scheduler closed", slog.String("interval", interval.String())) } - + bslog.Debug("waiting for schedulers to stop") sm.wg.Wait() + + bslog.Debug("schedulers stopped - closing pool") + sm.pool.Stop() + err := sm.OnShutdown() + if err != nil { + bslog.Error("error while performing shutdown tasks", slog.String("error", err.Error())) + } bslog.Debug("service manager closed") }) } +func (sm *ServicesManager) OnShutdown() error { + sm.mutex.Lock() + defer sm.mutex.Unlock() + bslog.Debug("executing manager.OnShutdown()") + + for memberOf, group := range sm.serviceGroups { + active := group.GetActive() + + for _, svc := range group.Members { + gslbService := svc.GSLBService() + + gslbService.IsActive = (active != nil && active.GetID() == svc.GetID()) + override, err := sm.svcRepo.HasOverride(memberOf) + if err != nil { + return fmt.Errorf("unable to check whether service group has active override: member-of: %s: %w", memberOf, err) + } + gslbService.HasOverride = override + + err = sm.svcRepo.Update(gslbService) + if err != nil { + return fmt.Errorf("failed to persist service state: service: %v: %w", svc, err) + } + } + } + + return nil +} + func (sm *ServicesManager) RegisterService(serviceCfg model.GSLBConfig) (*service.Service, error) { - newService, err := service.NewServiceFromGSLBConfig(serviceCfg, sm.dryrun) // create the service object + opts := sm.BuildServiceOptions(serviceCfg) + newService, err := service.NewServiceFromGSLBConfig( // create the service object + serviceCfg, + opts..., + ) if err != nil { return nil, fmt.Errorf("unable to register service: %s", err.Error()) } @@ -94,9 +147,22 @@ func (sm *ServicesManager) RegisterService(serviceCfg model.GSLBConfig) (*servic sm.mutex.Lock() defer sm.mutex.Unlock() + err = sm.svcRepo.Create(newService.GSLBService()) + if err != nil { + return nil, fmt.Errorf("failed to create new service: %w", err) + } + // set healthchange callback action newService.SetHealthChangeCallback(func(healthy bool) { bslog.Debug("received health-change", slog.Any("service", newService), slog.Bool("healthy", healthy)) + err := sm.svcRepo.Update(newService.GSLBService()) + if err != nil { + bslog.Error( + "failed to update service health on health-change", + slog.String("reason", err.Error()), + slog.Any("service", newService), + ) + } sm.serviceGroups[newService.MemberOf].OnServiceHealthChange(newService, healthy) }) @@ -134,16 +200,22 @@ func (sm *ServicesManager) RemoveService(id string) error { sm.mutex.RLock() group := sm.serviceGroups[svc.MemberOf] sm.mutex.RUnlock() + empty := group.RemoveService(svc.GetID()) // registered in group if empty { - delete(sm.serviceGroups, svc.MemberOf) + sm.deleteGroup(svc.MemberOf) } sm.mutex.Lock() defer sm.mutex.Unlock() + sm.scheduledServices.Delete(id) - bslog.Debug("removed service", slog.Any("service", svc)) + err := sm.svcRepo.Delete(svc.MemberOf, svc.GetID()) + if err != nil { + return fmt.Errorf("failed to delete service: %w", err) + } + bslog.Debug("removed service", slog.Any("service", svc)) return nil } @@ -172,29 +244,7 @@ func (sm *ServicesManager) updateService(old, new *service.Service) { sm.mutex.Unlock() if oldMemberOf != newMemberOf { - sm.mutex.Lock() - newGroup, newOk := sm.serviceGroups[newMemberOf] - if !newOk { - newGroup = sm.newServiceGroup(newMemberOf) - } - - oldGroup, oldOk := sm.serviceGroups[oldMemberOf] - sm.mutex.Unlock() - - newGroup.RegisterService(old) - var empty bool - if oldOk { - empty = oldGroup.RemoveService(old.GetID()) - - } - if empty { // delete empty service group - delete(sm.serviceGroups, oldMemberOf) - } - bslog.Debug( - "updated service group membership", - slog.String("oldGroup", oldMemberOf), - slog.String("newGroup", newMemberOf), - ) + sm.memberOfChanged(oldMemberOf, newMemberOf, old) } else { sm.mutex.RLock() oldGroup, ok := sm.serviceGroups[oldMemberOf] @@ -202,15 +252,22 @@ func (sm *ServicesManager) updateService(old, new *service.Service) { if ok { oldGroup.Update() // notify potential changes to group } else { // this will probably never run, but you never know in concurrency! - sm.mutex.Lock() - delete(sm.serviceGroups, oldMemberOf) - sm.mutex.Unlock() + sm.deleteGroup(oldMemberOf) } } sm.mutex.Lock() defer sm.mutex.Unlock() + err := sm.svcRepo.Update(old.GSLBService()) + if err != nil { + bslog.Error( + "failed to update service config persistently", + slog.String("reason", err.Error()), + slog.Any("service", old), + ) + } + // important that this checked AFTER the service groups have ran their update // this is because the group may trigger a promotion event that needs to be handled first // if the promotion event does not happen, we just simply move it to a new interval @@ -223,6 +280,56 @@ func (sm *ServicesManager) updateService(old, new *service.Service) { bslog.Debug("updated service", slog.Any("service", old)) } +func (sm *ServicesManager) memberOfChanged(oldMemberOf, newMemberOf string, svc *service.Service) { + sm.mutex.Lock() + + err := sm.svcRepo.Delete(oldMemberOf, svc.GetID()) + if err != nil { + bslog.Error( + "failed to remove service from old service group", + slog.String("reason", err.Error()), + slog.String("oldMemberOf", oldMemberOf), + slog.Any("service", svc), + ) + return + } + + err = sm.svcRepo.Create(svc.GSLBService()) + if err != nil { + bslog.Error( + "failed to add service to new group", + slog.String("reason", err.Error()), + slog.String("newMemberOf", newMemberOf), + slog.Any("service", svc), + ) + return + } + + newGroup, newOk := sm.serviceGroups[newMemberOf] + if !newOk { + newGroup = sm.newServiceGroup(newMemberOf) + } + + oldGroup, oldOk := sm.serviceGroups[oldMemberOf] + sm.mutex.Unlock() + + newGroup.RegisterService(svc) + + var empty bool + if oldOk { + empty = oldGroup.RemoveService(svc.GetID()) + + } + if empty { // delete empty service group + sm.deleteGroup(oldMemberOf) + } + bslog.Debug( + "updated service group membership", + slog.String("oldGroup", oldMemberOf), + slog.String("newGroup", newMemberOf), + ) +} + // re-schedules the relevant services in the PromotionEvent func (sm *ServicesManager) handlePromotion(event *PromotionEvent) { sm.mutex.Lock() @@ -263,6 +370,22 @@ func (sm *ServicesManager) handlePromotion(event *PromotionEvent) { if event.OldActive != nil && event.NewActive != nil { // just swap, and do dns updates demotedInterval = event.NewActive.ScheduledInterval + oldActiveGSLBService := event.OldActive.GSLBService() + oldActiveGSLBService.IsActive = false + err := sm.svcRepo.Update(oldActiveGSLBService) + if err != nil { + bslog.Error("failed to remove active flag from service", slog.Any("oldActive", event.OldActive)) + return + } + + newActiveGSLBService := event.NewActive.GSLBService() + newActiveGSLBService.IsActive = true + err = sm.svcRepo.Update(newActiveGSLBService) + if err != nil { + bslog.Error("failed to update active flag on service", slog.Any("newActive", event.NewActive)) + return + } + bslog.Warn("demoting service", slog.Any("oldActive", event.OldActive), slog.Group("intervalChange", @@ -270,6 +393,7 @@ func (sm *ServicesManager) handlePromotion(event *PromotionEvent) { slog.String("to", demotedInterval.String()), )) sm.moveServiceToInterval(event.OldActive, demotedInterval) + sm.DNSUpdate(event.OldActive, false) bslog.Warn("promoting service", @@ -284,13 +408,30 @@ func (sm *ServicesManager) handlePromotion(event *PromotionEvent) { } if event.NewActive != nil { // first service to come up when all services are down + newActiveGSLBService := event.NewActive.GSLBService() + newActiveGSLBService.IsActive = true + err := sm.svcRepo.Update(newActiveGSLBService) + if err != nil { + bslog.Error("failed to update active flag on service", slog.Any("newActive", event.NewActive)) + return + } bslog.Info("new active service", slog.Any("service", event.NewActive)) sm.moveServiceToInterval(event.NewActive, baseInterval) + if sm.DNSUpdate == nil { + bslog.Fatal("DNSUpdate is nil!!!!") + } sm.DNSUpdate(event.NewActive, true) return } if event.OldActive != nil { // no service to take over + oldActiveGSLBService := event.OldActive.GSLBService() + oldActiveGSLBService.IsActive = false + err := sm.svcRepo.Update(oldActiveGSLBService) + if err != nil { + bslog.Error("failed to remove active flag from service", slog.Any("oldActive", event.OldActive)) + return + } bslog.Warn("no available sites", slog.String("serviceGroup", event.Service)) sm.DNSUpdate(event.OldActive, false) return @@ -298,14 +439,24 @@ func (sm *ServicesManager) handlePromotion(event *PromotionEvent) { } func (sm *ServicesManager) newServiceGroup(memberOf string) *ServiceGroup { - newGroup := NewEmptyServiceGroup() + newGroup := NewEmptyServiceGroup(memberOf) newGroup.OnPromotion = func(event *PromotionEvent) { sm.handlePromotion(event) } sm.serviceGroups[memberOf] = newGroup + + serviceGroups.Inc() return newGroup } +// only called when we know it is safe to delete a group +func (sm *ServicesManager) deleteGroup(memberOf string) { + sm.mutex.Lock() + delete(sm.serviceGroups, memberOf) + sm.mutex.Unlock() + serviceGroups.Dec() +} + // creates a new scheduler, and starts its loop func (sm *ServicesManager) newScheduler(interval timesutil.Duration) *scheduler.Scheduler { if scheduler, ok := sm.schedulers[interval]; ok { // scheduler already exists @@ -315,8 +466,8 @@ func (sm *ServicesManager) newScheduler(interval timesutil.Duration) *scheduler. scheduler := scheduler.NewScheduler(time.Duration(interval), sm.wg) sm.schedulers[interval] = scheduler - scheduler.OnTick = func(s *service.Service) { - err := sm.pool.Put(s) + scheduler.OnTick = func(svc *service.Service) { + err := sm.pool.Put(healthcheck.NewJob(svc)) if errors.Is(err, pool.ErrPutOnClosedPool) { bslog.Error("failed to schedule health check", slog.String("reason", err.Error())) } @@ -350,6 +501,7 @@ func (sm *ServicesManager) moveServiceToInterval(svc *service.Service, newInterv if newScheduler == nil { newScheduler = sm.newScheduler(newInterval) } + newScheduler.ScheduleService(svc) bslog.Debug("sucessfully moved service to new interval", slog.String("oldInterval", oldInterval.String()), @@ -357,7 +509,7 @@ func (sm *ServicesManager) moveServiceToInterval(svc *service.Service, newInterv slog.Any("service", svc)) } -func (sm *ServicesManager) GetActiveForFQDN(memberOf string) *service.Service { +func (sm *ServicesManager) GetActiveForMemberOf(memberOf string) *service.Service { sm.mutex.RLock() defer sm.mutex.RUnlock() if group, ok := sm.serviceGroups[memberOf]; ok { @@ -379,3 +531,30 @@ func (sm *ServicesManager) Failover(fqdn string, failover failover.Failover) err return nil } + +func (sm *ServicesManager) BuildServiceOptions(config model.GSLBConfig) []service.ServiceOption { + opts := make([]service.ServiceOption, 0, 5) + opts = append(opts, service.WithDryRunChecks(sm.dryrun)) + + gslbService, err := sm.svcRepo.GetMemberInGroup(config.MemberOf, config.ServiceID) + if err != nil { + if errors.Is(err, svcRepo.ErrServiceInGroupNotFound) { + bslog.Debug("could not find member in group", + slog.String("group", config.MemberOf), + slog.String("member", config.ServiceID), + ) + } + // max out the failure count + // means a long time before service will be considered healthy + opts = append(opts, service.WithFailureCount(config.FailureThreshold)) + + return opts + } + + opts = append(opts, service.WithFailureCount(gslbService.FailureCount)) + if gslbService.IsHealthy { + opts = append(opts, service.WithHealthy()) + } + + return opts +} diff --git a/internal/manager/manager_test.go b/internal/manager/manager_test.go index b9e8e71..18321af 100644 --- a/internal/manager/manager_test.go +++ b/internal/manager/manager_test.go @@ -16,7 +16,7 @@ var genericGSLBConfig = model.GSLBConfig{ Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-FULL", } @@ -96,7 +96,7 @@ func TestStartAndStop(t *testing.T) { } } -func TestServicesManager_updateServiceUnlocked(t *testing.T) { +func TestServicesManager_updateService(t *testing.T) { tests := []struct { name string // description of this test case old model.GSLBConfig @@ -112,7 +112,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 2, CheckType: "TCP-FULL", }, @@ -127,7 +127,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { Ip: "192.168.1.1", Port: "80", Datacenter: "dc2", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-FULL", }, @@ -142,7 +142,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { Ip: "192.168.1.2", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-FULL", }, @@ -157,7 +157,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-HALF", }, @@ -172,7 +172,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-FULL", }, @@ -182,12 +182,12 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { old: genericGSLBConfig, new: model.GSLBConfig{ ServiceID: "123-test-456", - MemberOf: "example.example.com", + MemberOf: "example.com", Fqdn: "testing.example.com", Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(30 * time.Second), Priority: 1, CheckType: "TCP-FULL", }, @@ -197,6 +197,7 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { t.Run(tt.name, func(t *testing.T) { sm := NewManager(WithDryRun(true)) sm.Start() + defer sm.Stop() sm.DNSUpdate = func(s *service.Service, b bool) { @@ -206,19 +207,23 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { t.Fatalf("could not create service during testing: %s", err.Error()) } - new, err := service.NewServiceFromGSLBConfig(tt.new, true) + new, err := service.NewServiceFromGSLBConfig(tt.new, service.WithDryRunChecks(true)) if err != nil { t.Fatalf("could not create service during testing: %s", err.Error()) } sm.updateService(old, new) if old.ConfigChanged(new) { - t.Error("still pending config changes after update") + t.Fatal("still pending config changes after update") } _, interval, svc := sm.scheduledServices.Search(old.GetID()) if interval != new.GetDefaultInterval() { - t.Errorf("the service was not located at its correct interval, expected: %s but got: %s", new.GetDefaultInterval(), interval) + t.Fatalf("the service was not located at its correct interval, expected: %s but got: %s", new.GetDefaultInterval(), interval) + } + + if ok := sm.serviceGroups[old.MemberOf].memberExists(old); !ok { + t.Fatalf("service does not exist in expected service group, expected: %s", old.MemberOf) } if svc != old { @@ -228,13 +233,14 @@ func TestServicesManager_updateServiceUnlocked(t *testing.T) { } } + func TestServicesManager_moveServiceToInterval(t *testing.T) { tests := []struct { name string // description of this test case // Named input parameters for target function. config model.GSLBConfig newInterval timesutil.Duration - shouldExist bool + shouldExist bool }{ { name: "change-to-non-existing-interval", @@ -245,19 +251,21 @@ func TestServicesManager_moveServiceToInterval(t *testing.T) { name: "change-to-existing-interval", config: genericGSLBConfig, newInterval: timesutil.FromDuration(time.Second), - shouldExist: true, + shouldExist: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { sm := NewManager(WithDryRun(true)) + sm.Start() + defer sm.Stop() svc, _ := sm.RegisterService(tt.config) if tt.shouldExist { sm.newScheduler(tt.newInterval) } sm.moveServiceToInterval(svc, tt.newInterval) - + _, interval, _ := sm.scheduledServices.Search(svc.GetID()) if interval != tt.newInterval { t.Errorf("expected new interval: %s but got: %s", tt.newInterval.String(), interval.String()) @@ -270,3 +278,4 @@ func TestServicesManager_moveServiceToInterval(t *testing.T) { }) } } + diff --git a/internal/manager/metrics.go b/internal/manager/metrics.go new file mode 100644 index 0000000..233e1ce --- /dev/null +++ b/internal/manager/metrics.go @@ -0,0 +1,26 @@ +package manager + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + workerPoolSize = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "worker_pool_size_total", + Help: "Number of running workers that perform health checks", + }) + + serviceGroups = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "service_groups_total", + Help: "Number of service groups", + }) + + serviceGroupMembers = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "service_group_members", + Help: "Number of members in each service group", + }, + []string{"memberOf"}, + ) +) diff --git a/internal/manager/options.go b/internal/manager/options.go index 1c1c106..068d8b1 100644 --- a/internal/manager/options.go +++ b/internal/manager/options.go @@ -1,9 +1,12 @@ package manager +import "github.com/vitistack/gslb-operator/internal/repositories/service" + type managerConfig struct { MinRunningWorkers uint NonBlockingBufferSize uint DryRun bool + repo *service.ServiceRepo } type serviceManagerOption func(cfg *managerConfig) @@ -25,3 +28,9 @@ func WithDryRun(enabled bool) serviceManagerOption { cfg.DryRun = enabled } } + +func WithServiceRepository(repo *service.ServiceRepo) serviceManagerOption { + return func(cfg *managerConfig) { + cfg.repo = repo + } +} diff --git a/internal/manager/query_manager.go b/internal/manager/query_manager.go index 357b889..7c11f65 100644 --- a/internal/manager/query_manager.go +++ b/internal/manager/query_manager.go @@ -8,8 +8,8 @@ import ( // interface for API handlers that needs specific functionality from the manager. // without exposing all functionality type QueryManager interface { - GetActiveForFQDN(fqdn string) *service.Service - + GetActiveForMemberOf(memberOf string) *service.Service + //write operations Failover(fqdn string, failover failover.Failover) error } diff --git a/internal/manager/scheduler/scheduler.go b/internal/manager/scheduler/scheduler.go index 731f8dd..545af82 100644 --- a/internal/manager/scheduler/scheduler.go +++ b/internal/manager/scheduler/scheduler.go @@ -2,11 +2,13 @@ package scheduler import ( "container/heap" + "log/slog" "math/rand/v2" "sync" "time" "github.com/vitistack/gslb-operator/internal/service" + "github.com/vitistack/gslb-operator/pkg/bslog" ) const OFFSETS_PER_SECOND = 2 @@ -39,7 +41,7 @@ type Scheduler struct { // random jitter to spread out scheduled service on interval and sub-tick jitterRange time.Duration - stop chan struct{} + stop chan struct{} // signal stop wg *sync.WaitGroup mu sync.Mutex @@ -93,12 +95,15 @@ func (s *Scheduler) ScheduleService(svc *service.Service) { func (s *Scheduler) RemoveService(svc *service.Service) bool { s.mu.Lock() defer s.mu.Unlock() - idx := s.heap.GetServiceIndex(svc) + + idx := s.heap.GetServiceIndex(svc.GetID()) if idx == -1 { return s.heap.Len() == 0 - } - if idx == 0 { + } else if idx == 0 { s.heap[0].shouldReSchedule = false + if len(s.heap) == 1 { + return true + } } else { heap.Remove(&s.heap, idx) } @@ -140,27 +145,52 @@ func (s *Scheduler) loop() { s.mu.Lock() s.isRunning = false s.mu.Unlock() + bslog.Debug("scheduler closed", slog.String("interval", s.interval.String())) }() for { + select { + case <-s.stop: // check stop + bslog.Debug("got stop, exiting scheduler...") + return + default: + } + s.mu.Lock() if s.heap.Len() == 0 { // no need to infinitly run on an empty queue s.mu.Unlock() - break + return } next := s.heap.Peek() s.mu.Unlock() if next.nextCheckTime.Before(time.Now()) { // check time already past, do action immediately and reschedule s.OnTick(next.service) + + select { + case <-s.stop: // check stop + bslog.Debug("got stop, exiting scheduler...") + return + default: + } + s.reSchedule() } else { timeUntil := time.Until(next.nextCheckTime) select { case <-s.stop: + bslog.Debug("got stop, exiting scheduler...") return case <-time.After(timeUntil): s.OnTick(next.service) + + select { + case <-s.stop: // check stop + bslog.Debug("got stop, exiting scheduler...") + return + default: + } + s.reSchedule() } } diff --git a/internal/manager/scheduler/schedulerHeap.go b/internal/manager/scheduler/schedulerHeap.go index fc66f6f..4907dfc 100644 --- a/internal/manager/scheduler/schedulerHeap.go +++ b/internal/manager/scheduler/schedulerHeap.go @@ -1,7 +1,7 @@ package scheduler import ( - "github.com/vitistack/gslb-operator/internal/service" + "slices" ) type ServiceHeap []*ScheduledService @@ -38,11 +38,8 @@ func (h ServiceHeap) Peek() *ScheduledService { return h[0] } -func (h *ServiceHeap) GetServiceIndex(service *service.Service) int { - for index, scheduled := range *h { - if scheduled.service.GetID() == service.GetID() { - return index - } - } - return -1 +func (h *ServiceHeap) GetServiceIndex(id string) int { + return slices.IndexFunc(*h, func(s *ScheduledService) bool { + return s.service.GetID() == id + }) } diff --git a/internal/manager/scheduler/scheduler_test.go b/internal/manager/scheduler/scheduler_test.go index b0927dc..1e29184 100644 --- a/internal/manager/scheduler/scheduler_test.go +++ b/internal/manager/scheduler/scheduler_test.go @@ -1,8 +1,6 @@ package scheduler import ( - "fmt" - "math/rand" "sync" "testing" "time" @@ -13,10 +11,21 @@ import ( ) var genericGSLBConfig = model.GSLBConfig{ + ServiceID: "123", Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", - Interval: timesutil.Duration(5 * time.Second), + Interval: timesutil.Duration(time.Second), + Priority: 1, + CheckType: "TCP-FULL", +} + +var genericGSLBConfig2 = model.GSLBConfig{ + ServiceID: "456", + Ip: "192.168.1.2", + Port: "80", + Datacenter: "dc2", + Interval: timesutil.Duration(time.Second), Priority: 1, CheckType: "TCP-FULL", } @@ -62,63 +71,96 @@ func TestNewScheduler(t *testing.T) { } } -func TestScheduler_Loop(t *testing.T) { +func TestScheduleService(t *testing.T) { + svc, err := service.NewServiceFromGSLBConfig(genericGSLBConfig) + if err != nil { + t.Fatalf("could not create test service: %s", err.Error()) + } + + receivedTick := false + + wg := sync.WaitGroup{} + scheduler := NewScheduler(time.Duration(svc.GetDefaultInterval()), &wg) + scheduler.OnTick = func(s *service.Service) { + receivedTick = true + } + defer scheduler.Stop() + + scheduler.ScheduleService(svc) + if !scheduler.isRunning { + t.Errorf("scheduler is not running, expected: isRunning == true, but got: isRunning == false") + } + + if len(scheduler.heap) == 0 && !receivedTick { + t.Errorf("scheduler is running, but heap size is 0, means scheduler has pop'ed the heap before received tick") + } + +} + +func TestScheduler_RemoveService(t *testing.T) { + svc1, _ := service.NewServiceFromGSLBConfig(genericGSLBConfig) + svc2, _ := service.NewServiceFromGSLBConfig(genericGSLBConfig2) tests := []struct { name string // description of this test case // Named input parameters for receiver constructor. interval time.Duration + wg *sync.WaitGroup + // Named input parameters for target function. + svc *service.Service + want bool + addSecond bool + removeSecond bool }{ { - name: "100-services-on-5s", - interval: time.Second * 5, + name: "only-one", + interval: time.Second, + wg: &sync.WaitGroup{}, + svc: svc1, + want: true, + addSecond: false, + removeSecond: false, }, { - name: "100-services-on-15s", - interval: time.Second * 15, + name: "add-second-remove-first", + interval: time.Second, + wg: &sync.WaitGroup{}, + svc: svc1, + want: false, + addSecond: true, + removeSecond: false, }, { - name: "100-services-on-45s", - interval: time.Second * 45, + name: "add-second-remove-second", + interval: time.Second, + wg: &sync.WaitGroup{}, + svc: svc1, + want: false, + addSecond: true, + removeSecond: true, }, - { - name: "100-services-on-60s", - interval: time.Second * 60, - }, - } - - numServices := 100 - urls := randomUrlIDs(numServices) - - services := make([]*service.Service, 0, 100) - - for idx := range numServices { - genericGSLBConfig.Fqdn = urls[idx] - svc, _ := service.NewServiceFromGSLBConfig(genericGSLBConfig, true) - services = append(services, svc) } - for _, tt := range tests { - scheduler := NewScheduler(tt.interval, &sync.WaitGroup{}) - scheduler.OnTick = func(s *service.Service) { - t.Logf("received tick for: %s\n", s.Fqdn) - } - - for _, svc := range services { - scheduler.ScheduleService(svc) - } - time.Sleep(time.Second * 6) - } -} + t.Run(tt.name, func(t *testing.T) { + s := NewScheduler(tt.interval, tt.wg) -func randomUrlIDs(num int) []string { - baseUrl := "test.example.com" - urls := make([]string, 0, num) + s.ScheduleService(tt.svc) + var got bool + if tt.addSecond { + s.ScheduleService(svc2) + } - const charSet = "abcdefghijklmnopqrstuvwxyz" - for range num { - idx := rand.Intn(len(charSet)) - urls = append(urls, fmt.Sprintf("%v/%v", baseUrl, charSet[idx])) - } + if tt.removeSecond { + got = s.RemoveService(svc2) + } else { + got = s.RemoveService(tt.svc) + if s.heap.Peek().shouldReSchedule { + t.Errorf("scheduled service are set to be rescheduled after remove has been called") + } + } - return urls + if got != tt.want { + t.Errorf("RemoveService() = %v, but wanted %v", got, tt.want) + } + }) + } } diff --git a/internal/manager/servicegroup.go b/internal/manager/servicegroup.go index 7d85a65..baeed47 100644 --- a/internal/manager/servicegroup.go +++ b/internal/manager/servicegroup.go @@ -6,9 +6,7 @@ import ( "slices" "sync" - "github.com/vitistack/gslb-operator/internal/config" "github.com/vitistack/gslb-operator/internal/service" - "github.com/vitistack/gslb-operator/internal/utils" "github.com/vitistack/gslb-operator/pkg/bslog" "github.com/vitistack/gslb-operator/pkg/models/failover" ) @@ -43,6 +41,7 @@ type PromotionEvent struct { } type ServiceGroup struct { + Name string mode ServiceGroupMode // sorted by priority. @@ -63,15 +62,14 @@ type ServiceGroup struct { mu sync.RWMutex } -func NewEmptyServiceGroup() *ServiceGroup { - datacenter := config.GetInstance().Server().Datacenter() +func NewEmptyServiceGroup(name string) *ServiceGroup { return &ServiceGroup{ - mode: ActiveActive, - Members: make([]*service.Service, 0), - active: nil, - lastActive: nil, - prioritizedDatacenter: datacenter, - mu: sync.RWMutex{}, + Name: name, + mode: ActiveActive, + Members: make([]*service.Service, 0), + active: nil, + lastActive: nil, + mu: sync.RWMutex{}, } } @@ -112,28 +110,30 @@ func (sg *ServiceGroup) firstHealthy() *service.Service { func (sg *ServiceGroup) OnServiceHealthChange(changedService *service.Service, healthy bool) { sg.mu.Lock() - defer sg.mu.Unlock() oldActive := sg.active if oldActive == nil { oldActive = sg.lastActive } + switch sg.mode { case ActivePassive: if !healthy && sg.active.GetID() == changedService.GetID() { // active has gone down! sg.lastActive = sg.active + sg.mu.Unlock() sg.OnPromotion(sg.promoteNextHealthy()) return } if healthy && sg.triggerPromotion(changedService) { event := &PromotionEvent{ - Service: changedService.Fqdn, + Service: sg.Name, OldActive: oldActive, NewActive: changedService, } sg.lastActive = sg.active sg.active = changedService + sg.mu.Unlock() sg.OnPromotion(event) } @@ -141,8 +141,9 @@ func (sg *ServiceGroup) OnServiceHealthChange(changedService *service.Service, h if healthy { // If prioritized DC service becomes healthy, it must become active (single DNS record). if changedService.Datacenter == sg.prioritizedDatacenter && changedService != sg.active { + sg.mu.Unlock() sg.OnPromotion(&PromotionEvent{ - Service: changedService.Fqdn, + Service: sg.Name, NewActive: changedService, OldActive: sg.active, }) @@ -151,8 +152,9 @@ func (sg *ServiceGroup) OnServiceHealthChange(changedService *service.Service, h } // If there is no active or the current active is unhealthy, promote this healthy service. if sg.active == nil || !sg.active.IsHealthy() { + sg.mu.Unlock() sg.OnPromotion(&PromotionEvent{ - Service: changedService.Fqdn, + Service: sg.Name, NewActive: changedService, OldActive: sg.active, }) @@ -164,10 +166,11 @@ func (sg *ServiceGroup) OnServiceHealthChange(changedService *service.Service, h // unhealthy if changedService.GetID() == sg.active.GetID() { + sg.mu.Unlock() next := sg.firstHealthy() if next != nil { sg.OnPromotion(&PromotionEvent{ - Service: changedService.Fqdn, + Service: sg.Name, NewActive: next, OldActive: sg.active, }) @@ -178,12 +181,13 @@ func (sg *ServiceGroup) OnServiceHealthChange(changedService *service.Service, h // all down -> signal DNS delete (single-record) sg.OnPromotion(&PromotionEvent{ - Service: changedService.Fqdn, + Service: sg.Name, NewActive: nil, OldActive: sg.active, }) sg.lastActive = sg.active sg.active = nil + return } } } @@ -203,19 +207,25 @@ func (sg *ServiceGroup) RegisterService(newService *service.Service) { sg.mu.Unlock() sg.Update() + serviceGroupMembers.WithLabelValues(newService.MemberOf).Inc() } func (sg *ServiceGroup) RemoveService(id string) bool { sg.mu.Lock() - defer sg.mu.Unlock() + members := sg.Members + sg.mu.Unlock() - for idx, member := range sg.Members { - if member.GetID() == id { - sg.Members = utils.RemoveIndexFromSlice(sg.Members, idx) - sg.Update() - break - } + idx := slices.IndexFunc(members, func(s *service.Service) bool { + return s.GetID() == id + }) + if idx != -1 { + sg.mu.Lock() + sg.Members = append(members[:idx], members[idx+1:]...) + sg.mu.Unlock() + sg.Update() + serviceGroupMembers.WithLabelValues(sg.Name).Dec() } + return len(sg.Members) == 0 } @@ -240,7 +250,7 @@ func (sg *ServiceGroup) promoteNextHealthy() *PromotionEvent { if bestIdx != -1 { sg.active = sg.Members[bestIdx] return &PromotionEvent{ - Service: oldActive.Fqdn, + Service: sg.Name, NewActive: sg.active, OldActive: oldActive, } @@ -249,7 +259,7 @@ func (sg *ServiceGroup) promoteNextHealthy() *PromotionEvent { // No healthy services: signal DNS delete (NewActive=nil) sg.active = nil return &PromotionEvent{ - Service: oldActive.Fqdn, + Service: sg.Name, NewActive: nil, OldActive: oldActive, } @@ -301,21 +311,20 @@ func (sg *ServiceGroup) SetGroupMode() { } sg.mu.RUnlock() + sg.mu.Lock() + defer sg.mu.Unlock() + switch sg.mode { case ActiveActive: // If services have different priorities, switch to ActivePassive if !allSamePriority { - sg.mu.Lock() sg.mode = ActivePassive - sg.mu.Unlock() } case ActivePassive: // If all services have same priority, can switch to ActiveActive if allSamePriority { - sg.mu.Lock() sg.mode = ActiveActive - sg.mu.Unlock() // if none healthy, leave active nil } @@ -327,9 +336,7 @@ func (sg *ServiceGroup) SetGroupMode() { */ default: - sg.mu.Lock() sg.mode = ActiveActive - sg.mu.Unlock() } bslog.Debug("servicegroup mode set", slog.Any("mode", sg.mode.String())) } @@ -381,23 +388,7 @@ func (sg *ServiceGroup) Update() { sg.mu.RUnlock() sg.mu.Lock() - slices.SortFunc(sg.Members, func(a, b *service.Service) int { - aPriority := a.GetPriority() - bPriority := b.GetPriority() - - if aPriority != bPriority { - return cmp.Compare(aPriority, bPriority) - } - - // equal priority - prioritized datacenter decides (ActiveActive tie-break) - if a.Datacenter == sg.prioritizedDatacenter { - return -1 - } else if b.Datacenter == sg.prioritizedDatacenter { - return 1 - } - - return 0 - }) + slices.SortFunc(sg.Members, sortMembersFunc) sg.mu.Unlock() sg.SetGroupMode() @@ -410,10 +401,35 @@ func (sg *ServiceGroup) Update() { sg.active = firstHealthy event := &PromotionEvent{ - Service: firstHealthy.MemberOf, + Service: sg.Name, OldActive: sg.lastActive, NewActive: sg.active, } sg.OnPromotion(event) } } + +// func passed into slices.SortFunc for sorting the groups members +func sortMembersFunc(a, b *service.Service) int { + aPriority := a.GetPriority() + bPriority := b.GetPriority() + + if aPriority != bPriority { + return cmp.Compare(aPriority, bPriority) + } + + aRoundtrip := a.GetAverageRoundtrip() + bRoundtrip := b.GetAverageRoundtrip() + + // handle case where no roundtrip time has been recorded + aHasRoundtrip := aRoundtrip > 0 + bHasRoundtrip := bRoundtrip > 0 + + if aHasRoundtrip && bHasRoundtrip { + return cmp.Compare(aRoundtrip, bRoundtrip) + } else if aHasRoundtrip && !bHasRoundtrip { // prioritize the one who has recorded data + return -1 + } else { + return 1 + } +} diff --git a/internal/manager/servicegroup_test.go b/internal/manager/servicegroup_test.go index ef65a34..edee0aa 100644 --- a/internal/manager/servicegroup_test.go +++ b/internal/manager/servicegroup_test.go @@ -16,36 +16,38 @@ type Test struct { } var activeConfig = model.GSLBConfig{ + ServiceID: "123", Fqdn: "test.example.com", Ip: "192.168.1.1", Port: "80", Datacenter: "dc1", Interval: timesutil.Duration(5 * time.Second), Priority: 1, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", } var passiveConfig = model.GSLBConfig{ + ServiceID: "456", Fqdn: "test.example.com", Ip: "192.168.1.1", Port: "80", Datacenter: "dc2", Interval: timesutil.Duration(5 * time.Second), Priority: 2, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", } var active *service.Service var passive *service.Service func TestMain(m *testing.M) { - active, _ = service.NewServiceFromGSLBConfig(activeConfig, true) - passive, _ = service.NewServiceFromGSLBConfig(passiveConfig, true) + active, _ = service.NewServiceFromGSLBConfig(activeConfig, service.WithDryRunChecks(true)) + passive, _ = service.NewServiceFromGSLBConfig(passiveConfig, service.WithDryRunChecks(true)) m.Run() } func TestServiceGroup_RegisterService(t *testing.T) { - group := NewEmptyServiceGroup() + group := NewEmptyServiceGroup("test") group.OnPromotion = func(pe *PromotionEvent) { log.Println("got promotion") if pe != nil { @@ -66,14 +68,14 @@ func TestServiceGroup_RegisterService(t *testing.T) { t.Errorf("Expected group mode: %v, but got: %v, after two services with different priorities registered", ActivePassive, group.mode) } /* - if group.active != 0 { - t.Errorf("Expected activeIndex: %v, but got: %v", 0, group.activeIndex) - } - */ + if group.active != 0 { + t.Errorf("Expected activeIndex: %v, but got: %v", 0, group.activeIndex) + } + */ } func TestServiceGroup_OnServiceHealthChange(t *testing.T) { - group := NewEmptyServiceGroup() + group := NewEmptyServiceGroup("test") group.RegisterService(active) group.OnPromotion = func(pe *PromotionEvent) { @@ -140,7 +142,6 @@ func TestServiceGroup_OnServiceHealthChange(t *testing.T) { } makeServiceHealthy(active) - } func makeServiceHealthy(service *service.Service) { @@ -166,20 +167,20 @@ func TestServiceGroup_memberExists(t *testing.T) { { name: "exists", member: &service.Service{ - Fqdn: "test.example.com", + Fqdn: "test.example.com", Datacenter: "JK", }, want: true, }, { - name: "does-not-exist", + name: "does-not-exist", member: &service.Service{}, - want: false, + want: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - sg := NewEmptyServiceGroup() + sg := NewEmptyServiceGroup("test") if tt.want { sg.RegisterService(tt.member) } diff --git a/internal/model/dnsdist.go b/internal/model/dnsdist.go new file mode 100644 index 0000000..5ed8d21 --- /dev/null +++ b/internal/model/dnsdist.go @@ -0,0 +1,10 @@ +package model + +import "net" + +type DNSDISTServer struct { + Name string `json:"name"` + Host net.IP `json:"host"` + Port string `json:"port"` + Key string `json:"key"` +} diff --git a/internal/model/service.go b/internal/model/service.go new file mode 100644 index 0000000..ff2c996 --- /dev/null +++ b/internal/model/service.go @@ -0,0 +1,34 @@ +package model + +import ( + "github.com/vitistack/gslb-operator/pkg/models/spoofs" +) + +type GSLBServiceGroup []GSLBService + +// storage representation of service +// services that are configured with gslb config end up as a service.Service +type GSLBService struct { + ID string `json:"id"` + MemberOf string `json:"memberOf"` + Fqdn string `json:"fqdn"` + Datacenter string `json:"datacenter"` + IP string `json:"ip"` + IsHealthy bool `json:"isHealthy"` + FailureCount int `json:"failureCount"` + IsActive bool `json:"isActive"` + HasOverride bool `json:"hasOverride"` +} + +func (s GSLBService) Key() string { + return s.MemberOf +} + +// returns spoof representation of GSLBService +func (s GSLBService) Spoof() spoofs.Spoof { + return spoofs.Spoof{ + FQDN: s.MemberOf, + IP: s.IP, + DC: s.Datacenter, + } +} diff --git a/internal/repositories/service/service.go b/internal/repositories/service/service.go new file mode 100644 index 0000000..036c02c --- /dev/null +++ b/internal/repositories/service/service.go @@ -0,0 +1,238 @@ +package service + +import ( + "errors" + "fmt" + "slices" + + "github.com/vitistack/gslb-operator/internal/model" + "github.com/vitistack/gslb-operator/pkg/persistence" +) + +var ( + ErrServiceWithMemberOfNotFound = errors.New("service with member-of not found") + ErrServiceInGroupNotFound = errors.New("service in service-group not found") +) + +// repository for services that are considered active in a service group +type ServiceRepo struct { + store persistence.Store[model.GSLBServiceGroup] +} + +func NewServiceRepo(store persistence.Store[model.GSLBServiceGroup]) *ServiceRepo { + return &ServiceRepo{ + store: store, + } +} + +func (sr *ServiceRepo) Create(new *model.GSLBService) error { + override, err := sr.HasOverride(new.MemberOf) + if err != nil { + return err + } + + if override { + return nil + } + + group, err := sr.Read(new.MemberOf) + if err != nil { + return fmt.Errorf("failed to check for existing service group: %w", err) + } + + if group == nil { + group = make(model.GSLBServiceGroup, 0) + group = append(group, *new) + err := sr.store.Save(new.MemberOf, group) + if err != nil { + return fmt.Errorf("failed to store service: %w", err) + } + return nil + } + + if slices.ContainsFunc( + group, + func(s model.GSLBService) bool { + return s.ID == new.ID + }) { + //update instead + return sr.Update(new) + } + + group = append(group, *new) + err = sr.store.Save(new.Key(), group) + if err != nil { + return fmt.Errorf("failed to store service: %w", err) + } + + return nil +} + +func (sr *ServiceRepo) Update(new *model.GSLBService) error { + override, err := sr.HasOverride(new.MemberOf) + if err != nil { + return err + } + + group, err := sr.Read(new.MemberOf) + if err != nil { + return fmt.Errorf("failed to check for existing service group: %w", err) + } + + if len(group) == 0 { + return fmt.Errorf("failed to update service group: %s does not exist", new.MemberOf) + } + + idx := slices.IndexFunc(group, func(s model.GSLBService) bool { + return s.ID == new.ID + }) + + if idx == -1 { + return fmt.Errorf("%w: %s id: %s", ErrServiceInGroupNotFound, new.MemberOf, new.ID) + } + + if group[idx].IsActive && override { + new.IP = group[idx].IP + new.HasOverride = true + } + + group[idx] = *new + + if err := sr.store.Save(new.MemberOf, group); err != nil { + return fmt.Errorf("failed to update entry with id: %s: %w", new.MemberOf, err) + } + + return nil +} + +func (sr *ServiceRepo) UpdateOverride(ip string, service *model.GSLBService) error { + service.IP = ip + + group, err := sr.Read(service.MemberOf) + if err != nil { + return fmt.Errorf("failed to retrieve service group: %w", err) + } + + if len(group) == 0 { + return fmt.Errorf("failed to update service: service group for: %s does not exist", service.MemberOf) + } + + idx := slices.IndexFunc(group, func(s model.GSLBService) bool { + return s.ID == service.ID + }) + + if idx == -1 { + return fmt.Errorf("%w: %s id: %s", ErrServiceInGroupNotFound, service.MemberOf, service.ID) + } + group[idx] = *service + if err := sr.store.Save(service.MemberOf, group); err != nil { + return fmt.Errorf("failed to update override: %w", err) + } + + return nil +} + +func (sr *ServiceRepo) RemoveOverrideFlag(memberOf string) error { + group, err := sr.Read(memberOf) + if err != nil { + return err + } + + for idx := range group { + group[idx].HasOverride = false // update flag for every service in group + } + + return sr.store.Save(memberOf, group) +} + +func (sr *ServiceRepo) Delete(memberOf string, id string) error { + group, err := sr.Read(memberOf) + if err != nil { + return err + } + + override, err := sr.HasOverride(memberOf) + if err != nil { + return err + } + + if override { + return nil + } + + group = slices.DeleteFunc(group, func(s model.GSLBService) bool { // delete service with id + return s.ID == id + }) + if len(group) == 0 { // delete service group if empty group + err = sr.store.Delete(memberOf) + if err != nil { + return fmt.Errorf("failed to delete service group after empty result: %w", err) + } + } + + err = sr.store.Save(memberOf, group) // save the remaining services + if err != nil { + return fmt.Errorf("failed to delete entry with id: %s: %w", id, err) + } + return nil +} + +func (sr *ServiceRepo) Read(id string) (model.GSLBServiceGroup, error) { + group, err := sr.store.Load(id) + if err != nil { + return nil, fmt.Errorf("failed to read from storage: %w", err) + } + return group, nil +} + +func (sr *ServiceRepo) ReadAll() ([]model.GSLBServiceGroup, error) { + services, err := sr.store.LoadAll() + if err != nil { + return nil, fmt.Errorf("failed to read from storage: %w", err) + } + + return services, nil +} + +func (sr *ServiceRepo) GetActive(memberOf string) (model.GSLBService, error) { + group, err := sr.Read(memberOf) + if err != nil { + return model.GSLBService{}, err + } + + for _, svc := range group { + if svc.IsActive { + return svc, nil + } + } + + return model.GSLBService{}, fmt.Errorf("%w: member-of %s", ErrServiceWithMemberOfNotFound, memberOf) +} + +func (sr *ServiceRepo) GetMemberInGroup(memberOf, memberId string) (model.GSLBService, error) { + group, err := sr.Read(memberOf) + if err != nil { + return model.GSLBService{}, err + } + + idx := slices.IndexFunc(group, func(s model.GSLBService) bool { + return s.ID == memberId + }) + if idx == -1 { + return model.GSLBService{}, fmt.Errorf("%w: member-of: %s: member-id: %s", ErrServiceInGroupNotFound, memberOf, memberId) + } + + return group[idx], nil +} + +func (sr *ServiceRepo) HasOverride(memberOf string) (bool, error) { + svc, err := sr.GetActive(memberOf) + if err != nil { + if errors.Is(err, ErrServiceWithMemberOfNotFound) { + return false, nil + } + return false, err + } + + return svc.HasOverride, nil +} diff --git a/internal/repositories/spoof/spoof.go b/internal/repositories/spoof/spoof.go index 7c10797..6a99003 100644 --- a/internal/repositories/spoof/spoof.go +++ b/internal/repositories/spoof/spoof.go @@ -1,87 +1,100 @@ package spoof import ( + "cmp" + "crypto/sha256" + "encoding/hex" + "encoding/json" "errors" "fmt" + "slices" + "github.com/vitistack/gslb-operator/internal/model" "github.com/vitistack/gslb-operator/pkg/models/spoofs" "github.com/vitistack/gslb-operator/pkg/persistence" ) var ( - ErrSpoofWithFQDNNotFound = errors.New("spoof with fqdn not found") + ErrSpoofInServiceGroupNotFound = errors.New("spoof in service group not found") ) -type Repository struct { - storage persistence.Store[spoofs.Spoof] +// read-only repo for spoofs +type SpoofRepo struct { + store persistence.Store[model.GSLBServiceGroup] } -func NewRepository(storage persistence.Store[spoofs.Spoof]) *Repository { - return &Repository{ - storage: storage, +func NewSpoofRepo(storage persistence.Store[model.GSLBServiceGroup]) *SpoofRepo { + return &SpoofRepo{ + store: storage, } } -func (r *Repository) Create(key string, new *spoofs.Spoof) error { - err := r.storage.Save(key, *new) +func (r *SpoofRepo) Read(id string) (spoofs.Spoof, error) { + group, err := r.store.Load(id) if err != nil { - return fmt.Errorf("unable to store entry: %s", err.Error()) + return spoofs.Spoof{}, fmt.Errorf("failed to read from storage: %w", err) } - return nil -} -func (r *Repository) Update(id string, new *spoofs.Spoof) error { - err := r.storage.Save(id, *new) - if err != nil { - return fmt.Errorf("unable to update entry with id: %s: %s", id, err.Error()) + for _, svc := range group { + if svc.IsActive { + return svc.Spoof(), nil + } } - return nil + + return spoofs.Spoof{}, nil } -func (r *Repository) Delete(id string) error { - err := r.storage.Delete(id) +func (r *SpoofRepo) ReadMemberOf(memberOf string) (spoofs.Spoof, error) { + group, err := r.store.Load(memberOf) if err != nil { - return fmt.Errorf("unable to delete entry with id: %s: %s", id, err.Error()) + return spoofs.Spoof{}, fmt.Errorf("failed to read from storage: %w", err) } - return nil -} -func (r *Repository) Read(id string) (spoofs.Spoof, error) { - spoof, err := r.storage.Load(id) - if err != nil { - return spoofs.Spoof{}, fmt.Errorf("unable to read resource with id: %s", err.Error()) + for _, svc := range group { + if svc.IsActive { + return svc.Spoof(), nil + } } - return spoof, nil + return spoofs.Spoof{}, fmt.Errorf("%w: fqdn: %s", ErrSpoofInServiceGroupNotFound, memberOf) } -func (r *Repository) ReadFQDN(fqdn string) (spoofs.Spoof, error) { - allSpoofs, err := r.storage.LoadAll() +func (r *SpoofRepo) ReadAll() ([]spoofs.Spoof, error) { + groups, err := r.store.LoadAll() if err != nil { - return spoofs.Spoof{}, fmt.Errorf("unable to read all spoofs: %w", err) + return nil, fmt.Errorf("failed to read from storage: %w", err) } - for _, spoof := range allSpoofs { - if spoof.FQDN == fqdn { - return spoof, nil + spoofs := make([]spoofs.Spoof, 0) + for _, group := range groups { + for _, svc := range group { + if svc.IsActive { + spoofs = append(spoofs, svc.Spoof()) + } } } - return spoofs.Spoof{}, fmt.Errorf("%w: fqdn: %s", ErrSpoofWithFQDNNotFound, fqdn) + return spoofs, nil } -func (r *Repository) ReadAll() ([]spoofs.Spoof, error) { - return r.storage.LoadAll() -} +func (r *SpoofRepo) Hash() (string, error) { + data, err := r.ReadAll() + if err != nil { + return "", err + } + + slices.SortFunc( + data, + func(a, b spoofs.Spoof) int { + return cmp.Compare(a.FQDN+":"+a.DC, b.FQDN+":"+b.DC) + }, + ) -func (r *Repository) HasOverride(fqdn string) (bool, error) { - spoof, err := r.ReadFQDN(fqdn) + marshalledSpoofs, err := json.Marshal(data) if err != nil { - if errors.Is(err, ErrSpoofWithFQDNNotFound) { - return false, nil - } - return false, err + return "", fmt.Errorf("unable to serialize spoofs: %w", err) } - return spoof.DC == "OVERRIDE", nil + rawHash := sha256.Sum256(marshalledSpoofs) // creating bytes representation of spoofs + return hex.EncodeToString(rawHash[:]), nil } diff --git a/internal/service/service.go b/internal/service/service.go index 02105d2..5ebf0d8 100644 --- a/internal/service/service.go +++ b/internal/service/service.go @@ -15,10 +15,11 @@ import ( const DEFAULT_FAILURE_THRESHOLD = 3 type HealthChangeCallback func(healthy bool) +type ServiceOption func(s *Service) type Service struct { id string - addr string + addr *net.TCPAddr Fqdn string MemberOf string Datacenter string @@ -31,9 +32,10 @@ type Service struct { checker checks.Checker healthChangeCallback HealthChangeCallback isHealthy bool + dryRun bool } -func NewServiceFromGSLBConfig(config model.GSLBConfig, dryRun bool) (*Service, error) { +func NewServiceFromGSLBConfig(config model.GSLBConfig, opts ...ServiceOption) (*Service, error) { ip := net.ParseIP(config.Ip) if ip == nil { return nil, ErrUnableToParseIpAddr @@ -51,7 +53,7 @@ func NewServiceFromGSLBConfig(config model.GSLBConfig, dryRun bool) (*Service, e interval := CalculateInterval(config.Priority, config.Interval) svc := &Service{ id: config.ServiceID, - addr: addr.String(), + addr: addr, Fqdn: config.Fqdn, MemberOf: config.MemberOf, Datacenter: config.Datacenter, @@ -60,12 +62,17 @@ func NewServiceFromGSLBConfig(config model.GSLBConfig, dryRun bool) (*Service, e defaultInterval: interval, priority: config.Priority, FailureThreshold: config.FailureThreshold, - failureCount: config.FailureThreshold, + failureCount: config.FailureThreshold, // need to succeed check N times before healthy! isHealthy: false, + dryRun: false, + } + + for _, opt := range opts { + opt(svc) } switch { - case dryRun: + case svc.dryRun: svc.checker = &checks.DryRun{} case config.CheckType == checks.HTTPS: @@ -75,18 +82,38 @@ func NewServiceFromGSLBConfig(config model.GSLBConfig, dryRun bool) (*Service, e svc.checker = checks.NewHTTPChecker("https://"+svc.Fqdn, checks.DEFAULT_TIMEOUT, config.Script) case config.CheckType == checks.TCP_FULL: - svc.checker = checks.NewTCPFullChecker(svc.addr, checks.DEFAULT_TIMEOUT) + svc.checker = checks.NewTCPFullChecker(svc.addr.String(), checks.DEFAULT_TIMEOUT) case config.CheckType == checks.TCP_HALF: - svc.checker = checks.NewTCPHalfChecker(svc.addr, checks.DEFAULT_TIMEOUT) + svc.checker = checks.NewTCPHalfChecker(svc.addr.String(), checks.DEFAULT_TIMEOUT) default: - svc.checker = checks.NewTCPFullChecker(svc.addr, checks.DEFAULT_TIMEOUT) + svc.checker = checks.NewTCPFullChecker(svc.addr.String(), checks.DEFAULT_TIMEOUT) } return svc, nil } +func WithDryRunChecks(enabled bool) ServiceOption { + return func(s *Service) { + s.dryRun = enabled + } +} + +func WithHealthy() ServiceOption { + return func(s *Service) { + s.isHealthy = true + } +} + +func WithFailureCount(count int) ServiceOption { + return func(s *Service) { + if count > -1 { + s.failureCount = count + } // default values are handled in the creation of the service! + } +} + // 5s, 15s, 45s, checks.MAX_CHECK_INTERVAL. // Exponential growth of duration based on priority. Up to checks.MAX_CHECK_INTERVAL func CalculateInterval(priority int, baseInterval timesutil.Duration) timesutil.Duration { @@ -209,12 +236,8 @@ func (s *Service) GetPriority() int { return s.priority } -func (s *Service) GetIP() (string, error) { - ip, _, err := net.SplitHostPort(s.addr) - if err != nil { - return "", fmt.Errorf("could not read ip from network address: %s: %s", s.addr, err.Error()) - } - return ip, nil +func (s *Service) GetIP() string { + return s.addr.IP.String() } func (s *Service) GetDefaultInterval() timesutil.Duration { @@ -225,9 +248,17 @@ func (s *Service) GetID() string { return s.id } +func (s *Service) GetFailureCount() int { + return s.failureCount +} + +func (s *Service) GetAverageRoundtrip() time.Duration { + return s.checker.Roundtrip() +} + func (s *Service) ConfigChanged(other *Service) bool { if s.Fqdn != other.Fqdn || - s.addr != other.addr || + s.addr.String() != other.addr.String() || s.Datacenter != other.Datacenter || s.FailureThreshold != other.FailureThreshold || s.priority != other.priority || @@ -240,9 +271,11 @@ func (s *Service) ConfigChanged(other *Service) bool { // updates the configuration values of s with the values of new func (s *Service) Assign(new *Service) { s.addr = new.addr + s.Fqdn = new.Fqdn s.checker = new.checker s.MemberOf = new.MemberOf s.priority = new.priority + s.checkType = new.checkType s.Datacenter = new.Datacenter s.defaultInterval = new.defaultInterval s.FailureThreshold = new.FailureThreshold @@ -252,18 +285,29 @@ func (s *Service) LogValue() slog.Value { if s == nil { return slog.StringValue("nil") } - ip, _ := s.GetIP() + return slog.GroupValue( slog.String("id", s.id), slog.String("memberOf", s.MemberOf), slog.String("fqdn", s.Fqdn), slog.String("datacenter", s.Datacenter), - slog.String("ip", ip), + slog.String("ip", s.GetIP()), ) } // satisfies the stringer interface to allow passing s for %v in formatted strings func (s *Service) String() string { - ip, _ := s.GetIP() - return fmt.Sprintf("id:%s, memberOf: %s, fqdn: %s, datacenter: %s, ip: %s", s.id, s.MemberOf, s.Fqdn, s.Datacenter, ip) + return fmt.Sprintf("%s:%s:%s:%s:%s", s.id, s.MemberOf, s.Fqdn, s.Datacenter, s.GetIP()) +} + +func (s *Service) GSLBService() *model.GSLBService { + return &model.GSLBService{ + ID: s.id, + MemberOf: s.MemberOf, + Fqdn: s.Fqdn, + Datacenter: s.Datacenter, + IP: s.GetIP(), + IsHealthy: s.isHealthy, + FailureCount: s.failureCount, + } } diff --git a/internal/service/service_test.go b/internal/service/service_test.go index 1c1a9eb..d0314d4 100644 --- a/internal/service/service_test.go +++ b/internal/service/service_test.go @@ -2,7 +2,6 @@ package service import ( "errors" - "log" "testing" "time" @@ -132,9 +131,7 @@ func TestOnSuccess(t *testing.T) { for range svc0.FailureThreshold - 1 { svc0.OnFailure(errors.New("test error")) } - log.Printf("count: %v", svc0.failureCount) svc0.OnSuccess() - log.Printf("count: %v", svc0.failureCount) if !svc0.isHealthy { t.Errorf("Expected health: %v, but got: %v. After 2x OnFailure before OnSuccess()", true, svc0.IsHealthy()) @@ -143,9 +140,7 @@ func TestOnSuccess(t *testing.T) { for range svc0.FailureThreshold { svc0.OnFailure(errors.New("test error")) } - log.Printf("count: %v", svc0.failureCount) svc0.OnSuccess() - log.Printf("count: %v", svc0.failureCount) if svc0.isHealthy { t.Fatalf("Expected health: %v, but got: %v. After 3x OnFailure before OnSuccess()", false, svc0.IsHealthy()) @@ -241,9 +236,7 @@ func TestOnFailure(t *testing.T) { for range svc0.FailureThreshold - 1 { svc0.OnSuccess() } - log.Printf("count: %v", svc0.failureCount) svc0.OnFailure(errors.New("test")) - log.Printf("count: %v", svc0.failureCount) if svc0.isHealthy { t.Errorf("Expected health: %v, but got: %v. After 2x OnSuccess() before OnFailure()", false, svc0.IsHealthy()) @@ -252,9 +245,7 @@ func TestOnFailure(t *testing.T) { for range svc0.FailureThreshold { svc0.OnSuccess() } - log.Printf("count: %v", svc0.failureCount) svc0.OnFailure(errors.New("test")) - log.Printf("count: %v", svc0.failureCount) if !svc0.isHealthy { t.Fatalf("Expected health: %v, but got: %v. After 3x OnSuccess() before OnFailure()", true, svc0.IsHealthy()) @@ -272,13 +263,14 @@ func TestService_GetBaseInterval(t *testing.T) { { name: "baseinterval-5-priority-1", config: model.GSLBConfig{ + ServiceID: "123", Fqdn: "test.nhn.no", Ip: "127.0.0.1", Port: "80", Datacenter: "Abels1", Interval: timesutil.FromDuration(time.Second * 5), Priority: 1, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", }, dryRun: true, want: timesutil.FromDuration(time.Second * 5), @@ -286,13 +278,14 @@ func TestService_GetBaseInterval(t *testing.T) { { name: "baseinterval-5-priority-2", config: model.GSLBConfig{ + ServiceID: "123", Fqdn: "test.nhn.no", Ip: "127.0.0.1", Port: "80", Datacenter: "Abels1", Interval: timesutil.FromDuration(time.Second * 5), Priority: 2, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", }, dryRun: true, want: timesutil.FromDuration(time.Second * 5), @@ -300,13 +293,14 @@ func TestService_GetBaseInterval(t *testing.T) { { name: "baseinterval-5-priority-3", config: model.GSLBConfig{ + ServiceID: "123", Fqdn: "test.nhn.no", Ip: "127.0.0.1", Port: "80", Datacenter: "Abels1", Interval: timesutil.FromDuration(time.Second * 5), Priority: 3, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", }, dryRun: true, want: timesutil.FromDuration(time.Second * 5), @@ -314,13 +308,14 @@ func TestService_GetBaseInterval(t *testing.T) { { name: "baseinterval-5-priority-4", config: model.GSLBConfig{ + ServiceID: "123", Fqdn: "test.nhn.no", Ip: "127.0.0.1", Port: "80", Datacenter: "Abels1", Interval: timesutil.FromDuration(time.Second * 5), Priority: 4, - CheckType: "TCP-FULL", + CheckType: "TCP-FULL", }, dryRun: true, want: timesutil.FromDuration(time.Second * 5), @@ -328,7 +323,7 @@ func TestService_GetBaseInterval(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s, err := NewServiceFromGSLBConfig(tt.config, tt.dryRun) + s, err := NewServiceFromGSLBConfig(tt.config, WithDryRunChecks(tt.dryRun)) if err != nil { t.Fatalf("could not construct receiver type: %v", err) } diff --git a/pkg/auth/auth.go b/pkg/auth/auth.go index 2e71e6e..999c780 100644 --- a/pkg/auth/auth.go +++ b/pkg/auth/auth.go @@ -16,7 +16,7 @@ func WithTokenValidation(logger *slog.Logger) middleware.MiddlewareFunc { return func(w http.ResponseWriter, r *http.Request) { ctx := context.WithValue(r.Context(), "request_method", r.Method) ctx = context.WithValue(ctx, "request_route", r.URL.String()) - + resp, err := jwt.Validate(ctx, strings.Split(r.Header.Get("Authorization"), "Bearer")[1]) if err != nil { logger.Error("token-validation failed", slog.String("reason", err.Error())) diff --git a/pkg/dnsdist/client.go b/pkg/dnsdist/client.go index 080424d..19c4977 100644 --- a/pkg/dnsdist/client.go +++ b/pkg/dnsdist/client.go @@ -13,8 +13,6 @@ import ( "fmt" "io" "net" - "strconv" - "strings" "time" "golang.org/x/crypto/nacl/secretbox" @@ -25,8 +23,6 @@ const ( NONCE_LEN = 24 ) -type clientOption func(c *Client) error - type Client struct { conn net.Conn //raw connection to configured Host and Port key [KEY_LEN]byte @@ -72,57 +68,6 @@ func NewClient(key string, options ...clientOption) (*Client, error) { return client, nil } -func WithHost(host string) clientOption { - return func(c *Client) error { - ip := net.ParseIP(host) - if ip == nil { - return ErrCouldNotParseAddr - } - c.host = ip - return nil - } -} - -func WithPort(port string) clientOption { - return func(c *Client) error { - port = strings.TrimSpace(port) - if port == "" { - return ErrCouldNotParseAddr - } - // Ensure all characters are digits - for _, r := range port { - if r < '0' || r > '9' { - return ErrCouldNotParseAddr - } - } - - p, err := strconv.Atoi(port) - if err != nil || p < 1 || p > 65535 { - return ErrCouldNotParseAddr - } - - c.port = port - return nil - } -} - -func WithTimeout(timeout time.Duration) clientOption { - return func(c *Client) error { - c.timeout = timeout - return nil - } -} - -func WithNumRetriesOnCommandFailure(retries int) clientOption { - return func(c *Client) error { - if retries < 0 { - return ErrNegativeRetryCount - } - c.retries = retries - return nil - } -} - func (c *Client) generateClientNonce() error { bufferNonce := make([]byte, NONCE_LEN) _, err := rand.Read(bufferNonce) // initialize client nonce @@ -310,22 +255,20 @@ func incrementNonce(nonce *[NONCE_LEN]byte) { binary.BigEndian.PutUint32(nonce[:4], value) } -func (c *Client) AddDomainSpoof(domain string, ips []string) error { - // addAction(QNameRule('example.com'), SpoofAction({"192.168.1.0","192.168.1.2"}), {name="example.com"}) - cmd := fmt.Sprintf("addAction(QNameRule('%v'), SpoofAction({", domain) +func (c *Client) AddDomainSpoof(ruleName, domain, ip string) error { + // addAction(QNameRule('example.com'), SpoofAction({"192.168.1.0"}), {name="example.com:DC"}) + cmd := fmt.Sprintf("addAction(QNameRule('%v'), SpoofAction({'%s'}, {ttl=3600}), {name='%s'})", domain, ip, ruleName) + return Must(c.command(cmd)) +} - for _, ip := range ips { - cmd += fmt.Sprintf("'%v', ", ip) - } - idx := strings.LastIndex(cmd, ",") - if idx == -1 { - return fmt.Errorf("no trailing comma found in command: %s", cmd) - } - cmd = fmt.Sprintf("%v {name='%v'})", cmd[:idx]+"}),", domain) +func (c *Client) RmRuleWithName(ruleName string) error { + return Must(c.command(fmt.Sprintf("rmRule('%s')", ruleName))) +} - return Must(c.command(cmd)) +func (c *Client) RmRuleWithIndex(idx int) error { + return Must(c.command(fmt.Sprintf("rmRule('%d')", idx))) } -func (c *Client) RmDomainSpoof(domain string) error { - return Must(c.command(fmt.Sprintf("rmRule(%s)", domain))) +func (c *Client) ShowRules() (string, error) { + return c.command("showRules()") } diff --git a/pkg/dnsdist/client_test.go b/pkg/dnsdist/client_test.go index 95b2fa8..9d6aec6 100644 --- a/pkg/dnsdist/client_test.go +++ b/pkg/dnsdist/client_test.go @@ -1,7 +1,6 @@ package dnsdist import ( - "log" "testing" ) @@ -13,9 +12,11 @@ func TestNewClient(t *testing.T) { if err != nil { t.Errorf("could not create client: %v", err.Error()) } - + } +/* +TODO: need to mock dnsdist - server in testing func TestCommand(t *testing.T) { client, err := NewClient( "M2YQKiPEDzeWHUFjejVOd+QHmMVmm2SuYG7vSXdaIkE=", @@ -43,8 +44,9 @@ func TestAddDomainSpoof(t *testing.T) { t.Errorf("could not create client: %v", err.Error()) } - err = client.AddDomainSpoof("test.nhn.no", []string{"10.10.0.1", "10.10.0.2"}) + err = client.AddDomainSpoof("test.nhn.no:test", "test.nhn.no", "127.0.0.1") if err != nil { t.Errorf("failed to create DomainSpoof") } } +*/ diff --git a/pkg/dnsdist/mock_server.go b/pkg/dnsdist/mock_server.go new file mode 100644 index 0000000..14791d5 --- /dev/null +++ b/pkg/dnsdist/mock_server.go @@ -0,0 +1,205 @@ +package dnsdist + +import ( + "crypto/rand" + "encoding/binary" + "fmt" + "io" + "net" + "sync" + "testing" + + "golang.org/x/crypto/nacl/secretbox" +) + +// MockServer simulates a dnsdist console server for testing +type MockServer struct { + listener net.Listener + key [KEY_LEN]byte + addr string + handlers map[string]func(string) string // command -> response handler + mu sync.RWMutex + running bool + wg sync.WaitGroup +} + +// NewMockServer creates a new mock dnsdist server with the given key +func NewMockServer(t *testing.T, key [KEY_LEN]byte) *MockServer { + listener, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to create mock server: %v", err) + } + + ms := &MockServer{ + listener: listener, + key: key, + addr: listener.Addr().String(), + handlers: make(map[string]func(string) string), + } + + // Set default handlers + ms.SetHandler("", func(cmd string) string { return "" }) // empty command for handshake + ms.SetHandler("showRules()", func(cmd string) string { return "Rules:\n" }) + + return ms +} + +// Start begins accepting connections +func (ms *MockServer) Start() { + ms.mu.Lock() + ms.running = true + ms.mu.Unlock() + + ms.wg.Add(1) + go ms.acceptLoop() +} + +// Stop stops the server and closes all connections +func (ms *MockServer) Stop() { + ms.mu.Lock() + ms.running = false + ms.mu.Unlock() + + ms.listener.Close() + ms.wg.Wait() +} + +// Addr returns the server's address +func (ms *MockServer) Addr() string { + return ms.addr +} + +// SetHandler sets a response handler for a specific command +func (ms *MockServer) SetHandler(cmd string, handler func(string) string) { + ms.mu.Lock() + defer ms.mu.Unlock() + ms.handlers[cmd] = handler +} + +func (ms *MockServer) acceptLoop() { + defer ms.wg.Done() + + for { + conn, err := ms.listener.Accept() + if err != nil { + ms.mu.RLock() + running := ms.running + ms.mu.RUnlock() + if !running { + return + } + continue + } + + ms.wg.Add(1) + go ms.handleConnection(conn) + } +} + +func (ms *MockServer) handleConnection(conn net.Conn) { + defer ms.wg.Done() + defer conn.Close() + + // Read client nonce + cNonce := make([]byte, NONCE_LEN) + _, err := io.ReadFull(conn, cNonce) + if err != nil { + return + } + + // Generate and send server nonce + sNonce := make([]byte, NONCE_LEN) + _, err = rand.Read(sNonce) + if err != nil { + return + } + + _, err = conn.Write(sNonce) + if err != nil { + return + } + + // Initialize read/write nonces + var rNonce, wNonce [NONCE_LEN]byte + halfNonce := NONCE_LEN / 2 + + // Server's read nonce (client's write nonce) + copy(rNonce[:halfNonce], sNonce[:halfNonce]) + copy(rNonce[halfNonce:], cNonce[halfNonce:]) + + // Server's write nonce (client's read nonce) + copy(wNonce[:halfNonce], cNonce[:halfNonce]) + copy(wNonce[halfNonce:], sNonce[halfNonce:]) + + // Handle commands + for { + cmd, err := ms.receiveCommand(conn, &rNonce) + if err != nil { + return + } + + response := ms.getResponse(cmd) + + err = ms.sendResponse(conn, response, &wNonce) + if err != nil { + return + } + } +} + +func (ms *MockServer) receiveCommand(conn net.Conn, rNonce *[NONCE_LEN]byte) (string, error) { + // Read length + bufferLen := make([]byte, 4) + _, err := io.ReadFull(conn, bufferLen) + if err != nil { + return "", err + } + + // Read encrypted command + cmdLen := binary.BigEndian.Uint32(bufferLen) + encryptedCmd := make([]byte, cmdLen) + _, err = io.ReadFull(conn, encryptedCmd) + if err != nil { + return "", err + } + + // Decrypt + decrypted, ok := secretbox.Open(nil, encryptedCmd, rNonce, &ms.key) + if !ok { + return "", fmt.Errorf("decryption failed") + } + + incrementNonce(rNonce) + + return string(decrypted), nil +} + +func (ms *MockServer) sendResponse(conn net.Conn, response string, wNonce *[NONCE_LEN]byte) error { + // Encrypt response + encrypted := secretbox.Seal(nil, []byte(response), wNonce, &ms.key) + incrementNonce(wNonce) + + // Send length + bufferLen := make([]byte, 4) + binary.BigEndian.PutUint32(bufferLen, uint32(len(encrypted))) + _, err := conn.Write(bufferLen) + if err != nil { + return err + } + + // Send encrypted response + _, err = conn.Write(encrypted) + return err +} + +func (ms *MockServer) getResponse(cmd string) string { + ms.mu.RLock() + defer ms.mu.RUnlock() + + if handler, ok := ms.handlers[cmd]; ok { + return handler(cmd) + } + + // Default: return empty response + return "" +} \ No newline at end of file diff --git a/pkg/dnsdist/options.go b/pkg/dnsdist/options.go new file mode 100644 index 0000000..6b36286 --- /dev/null +++ b/pkg/dnsdist/options.go @@ -0,0 +1,74 @@ +package dnsdist + +import ( + "fmt" + "net" + "strconv" + "strings" + "time" +) + +type clientOption func(c *Client) error + +func WithHost(host string) clientOption { + return func(c *Client) error { + ip := net.ParseIP(host) + if ip == nil { + return ErrCouldNotParseAddr + } + c.host = ip + return nil + } +} + +func WithHostName(hostname string) clientOption { + return func(c *Client) error { + ips, err := net.LookupHost(hostname) + if err != nil { + return fmt.Errorf("DNS - lookup failed: %w", err) + } + + c.host = net.IP(ips[0]) + return nil + } +} + +func WithPort(port string) clientOption { + return func(c *Client) error { + port = strings.TrimSpace(port) + if port == "" { + return ErrCouldNotParseAddr + } + // Ensure all characters are digits + for _, r := range port { + if r < '0' || r > '9' { + return ErrCouldNotParseAddr + } + } + + p, err := strconv.Atoi(port) + if err != nil || p < 1 || p > 65535 { + return ErrCouldNotParseAddr + } + + c.port = port + return nil + } +} + +func WithTimeout(timeout time.Duration) clientOption { + return func(c *Client) error { + c.timeout = timeout + return nil + } +} + +func WithNumRetriesOnCommandFailure(retries int) clientOption { + return func(c *Client) error { + if retries < 0 { + return ErrNegativeRetryCount + } + c.retries = retries + return nil + } +} diff --git a/pkg/dnsdist/rule.go b/pkg/dnsdist/rule.go new file mode 100644 index 0000000..34b37aa --- /dev/null +++ b/pkg/dnsdist/rule.go @@ -0,0 +1,9 @@ +package dnsdist + +type Rule struct { + ID string + Name string + Matches string + Rule string + Action string +} diff --git a/pkg/loaders/file_loader.go b/pkg/loaders/file_loader.go index 770cf27..ce632f0 100644 --- a/pkg/loaders/file_loader.go +++ b/pkg/loaders/file_loader.go @@ -3,7 +3,9 @@ package loaders import ( "encoding/json" "fmt" + "io/fs" "os" + "path/filepath" "reflect" "strings" @@ -14,10 +16,36 @@ type FileLoader struct { fileNames []string } -func NewFileLoader(fileNames ...string) *FileLoader { - return &FileLoader{ - fileNames: fileNames, +func NewFileLoader(fileNames ...string) (*FileLoader, error) { + loader := &FileLoader{ + fileNames: make([]string, 0, len(fileNames)), } + for _, file := range fileNames { + info, err := os.Stat(file) + if err == nil { // silently drop files that dont exist + if info.IsDir() { + err := filepath.Walk(file, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + loader.fileNames = append(loader.fileNames, path) + return nil + }) + if err != nil { + return nil, fmt.Errorf("could not list files in directory: %w", err) + } + } else { + loader.fileNames = append(loader.fileNames, file) + } + } + } + + return loader, nil } func (f *FileLoader) Load(dest any) error { @@ -31,7 +59,7 @@ func (f *FileLoader) Load(dest any) error { err = f.loadJSON(dest, file) default: - err = f.loadDotEnv(dest, file) + err = f.loadPlainText(dest, file) } if err != nil { return fmt.Errorf("could not load file: %s: %w", file, err) @@ -89,3 +117,48 @@ func (f *FileLoader) loadJSON(dest any, file string) error { return nil } + +func (f *FileLoader) loadPlainText(dest any, file string) error { + info, err := os.Stat(file) + if err != nil { + return fmt.Errorf("unable to load file: %s: %w", file, err) + } + + if info.IsDir() { // skip directories + return nil + } + + val := reflect.ValueOf(dest).Elem() + typ := val.Type() + + if typ.Kind() != reflect.Struct { + return fmt.Errorf("unable to load config file: %s: destination must be a struct pointer", file) + } + rawData, err := os.ReadFile(file) + if err != nil { + return fmt.Errorf("could not read file: %s: %w", file, err) + } + data := string(rawData) + + for i := range val.NumField() { + field := val.Field(i) + fieldTyp := typ.Field(i) + + if !field.CanSet() { + continue + } + + tag, ok := fieldTyp.Tag.Lookup("env") + if !ok { + continue + } + + if strings.Contains(file, tag) { // file name must contain the struct tag + if err := setEnvironmentVariable(field, data); err != nil { + return fmt.Errorf("unable to set struct value: %w", err) + } + } + } + + return nil +} diff --git a/pkg/loaders/flag_loader.go b/pkg/loaders/flag_loader.go index fb1f459..cd2e0d2 100644 --- a/pkg/loaders/flag_loader.go +++ b/pkg/loaders/flag_loader.go @@ -1,7 +1,6 @@ package loaders - -type FlagLoader struct {} +type FlagLoader struct{} func NewFlagLoader() *FileLoader { return &FileLoader{} @@ -10,4 +9,4 @@ func NewFlagLoader() *FileLoader { func (f *FlagLoader) Load(dest any) error { return nil -} \ No newline at end of file +} diff --git a/pkg/lua/bucket.go b/pkg/lua/bucket.go index 0326e66..03ca911 100644 --- a/pkg/lua/bucket.go +++ b/pkg/lua/bucket.go @@ -28,7 +28,7 @@ func (pl *LuaBucket) get() *glua.LState { func (pl *LuaBucket) new() *glua.LState { L := glua.NewState(glua.Options{ - SkipOpenLibs: true, + SkipOpenLibs: true, IncludeGoStackTrace: true, MinimizeStackMemory: true, }) diff --git a/pkg/models/spoofs/hash.go b/pkg/models/spoofs/hash.go index 2840ce3..d1ae610 100644 --- a/pkg/models/spoofs/hash.go +++ b/pkg/models/spoofs/hash.go @@ -2,4 +2,4 @@ package spoofs type Hash struct { Hash string `json:"hash"` -} \ No newline at end of file +} diff --git a/pkg/models/spoofs/override.go b/pkg/models/spoofs/override.go index 1e5f6ea..6b5af86 100644 --- a/pkg/models/spoofs/override.go +++ b/pkg/models/spoofs/override.go @@ -3,6 +3,6 @@ package spoofs import "net" type Override struct { - FQDN string `json:"fqdn"` - IP net.IP `json:"ip,omitempty"` + MemberOf string `json:"memberOf"` + IP net.IP `json:"ip,omitempty"` } diff --git a/pkg/persistence/persistence.go b/pkg/persistence/persistence.go index 7e9f2e0..4b0358f 100644 --- a/pkg/persistence/persistence.go +++ b/pkg/persistence/persistence.go @@ -14,4 +14,5 @@ type Store[T any] interface { Load(key string) (T, error) LoadAll() ([]T, error) Delete(key string) error + Close() error } diff --git a/pkg/persistence/store/file/file.go b/pkg/persistence/store/file/file.go index e22c944..d6706fd 100644 --- a/pkg/persistence/store/file/file.go +++ b/pkg/persistence/store/file/file.go @@ -8,7 +8,8 @@ import ( ) type Store[T any] struct { - lock sync.RWMutex + lock sync.RWMutex + cache map[string]T fileName string } @@ -20,8 +21,9 @@ func NewStore[T any](fileName string) (*Store[T], error) { store.Close() return &Store[T]{ - lock: sync.RWMutex{}, + lock: sync.RWMutex{}, fileName: fileName, + cache: make(map[string]T), }, nil } @@ -29,6 +31,8 @@ func (s *Store[T]) Save(key string, data T) error { s.lock.Lock() defer s.lock.Unlock() + s.cache[key] = data + saved, err := os.ReadFile(s.fileName) if err != nil { return fmt.Errorf("could not read file: %s", err.Error()) @@ -58,22 +62,30 @@ func (s *Store[T]) Save(key string, data T) error { } func (s *Store[T]) Load(key string) (T, error) { - var zero T s.lock.Lock() defer s.lock.Unlock() + var zero T - saved, err := os.ReadFile(s.fileName) + data, ok := s.cache[key] + if ok { + return data, nil + } + + file, err := os.ReadFile(s.fileName) if err != nil { - return zero, fmt.Errorf("unable to read from storage: %s", err.Error()) + return zero, fmt.Errorf("unable to read storage: %w", err) } - store := make(map[string]T) - err = json.Unmarshal(saved, &store) + if len(file) == 0 { + return zero, nil + } + + err = json.Unmarshal(file, &s.cache) if err != nil { - return zero, fmt.Errorf("unable to read: %s: %s", key, err.Error()) + return zero, fmt.Errorf("unable to parse: %s: %s", key, err.Error()) } - return store[key], nil + return s.cache[key], nil } func (s *Store[T]) LoadAll() ([]T, error) { @@ -86,13 +98,18 @@ func (s *Store[T]) LoadAll() ([]T, error) { return nil, fmt.Errorf("unable to read from storage: %s", err.Error()) } + if len(saved) == 0 { + return all, nil + } + store := make(map[string]T) err = json.Unmarshal(saved, &store) if err != nil { return nil, fmt.Errorf("unable to parse JSON: %s", err.Error()) } - for _, val := range store { + for key, val := range store { + s.cache[key] = val all = append(all, val) } @@ -103,6 +120,8 @@ func (s *Store[T]) Delete(key string) error { s.lock.Lock() defer s.lock.Unlock() + delete(s.cache, key) + saved, err := os.ReadFile(s.fileName) if err != nil { return fmt.Errorf("could not read file: %s", err.Error()) @@ -130,3 +149,7 @@ func (s *Store[T]) Delete(key string) error { return nil } + +func (s *Store[T]) Close() error { + return nil +} diff --git a/pkg/persistence/store/memory/memory.go b/pkg/persistence/store/memory/memory.go index 318644b..2733295 100644 --- a/pkg/persistence/store/memory/memory.go +++ b/pkg/persistence/store/memory/memory.go @@ -1,7 +1,6 @@ package memory import ( - "fmt" "sync" ) @@ -30,7 +29,7 @@ func (s *Store[T]) Load(key string) (T, error) { val, exist := s.data[key] if !exist { var zero T - return zero, fmt.Errorf("resource: %s, does not exist", key) + return zero, nil } return val, nil } @@ -43,7 +42,7 @@ func (s *Store[T]) LoadAll() ([]T, error) { for _, val := range s.data { result = append(result, val) } - + return result, nil } @@ -53,3 +52,7 @@ func (s *Store[T]) Delete(key string) error { delete(s.data, key) return nil } + +func (s *Store[T]) Close() error { + return nil +} diff --git a/pkg/pool/bufferedQueue.go b/pkg/pool/bufferedQueue.go index 5e7d267..62df50f 100644 --- a/pkg/pool/bufferedQueue.go +++ b/pkg/pool/bufferedQueue.go @@ -1,9 +1,8 @@ package pool - type BufferedJobQueue chan Job // returns true wether a new item will block or not func (bq *BufferedJobQueue) Blocked() bool { return len(*bq) == cap(*bq) -} \ No newline at end of file +} diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go index b158672..d7dd629 100644 --- a/pkg/pool/pool.go +++ b/pkg/pool/pool.go @@ -24,6 +24,10 @@ type WorkerPool struct { poolWg *sync.WaitGroup lock sync.Mutex closed *atomic.Bool + + // configurable action to take on worker-pool scale + OnScaleUp func() + OnScaleDown func() } func NewWorkerPool(minRunningWorkers, nonBlockingBufferSize uint) *WorkerPool { @@ -77,8 +81,12 @@ func (wp *WorkerPool) Put(job Job) error { } func (wp *WorkerPool) scale() { + wp.lock.Lock() if wp.jobs.Blocked() { + wp.lock.Unlock() wp.newWorker() + } else { + wp.lock.Unlock() } } @@ -102,13 +110,17 @@ func (wp *WorkerPool) ScaleTo(targetWorkers uint) { func (wp *WorkerPool) newWorker() { wp.lock.Lock() - defer wp.lock.Unlock() wp.numRunningWorkers++ id := uuid.New().ID() wp.poolWg.Add(1) go wp.worker(id) + wp.lock.Unlock() + + if wp.OnScaleUp != nil { + wp.OnScaleUp() + } } func (wp *WorkerPool) worker(id uint32) { @@ -124,7 +136,7 @@ func (wp *WorkerPool) worker(id uint32) { err := job.Execute() if err != nil { job.OnFailure(err) - }else { + } else { job.OnSuccess() } @@ -140,6 +152,11 @@ func (wp *WorkerPool) worker(id uint32) { if wp.numRunningWorkers > wp.minRunningWorkers { wp.numRunningWorkers-- wp.lock.Unlock() + + if wp.OnScaleDown != nil { + wp.OnScaleDown() + } + return } wp.lock.Unlock() diff --git a/pkg/rest/const.go b/pkg/rest/const.go index b2514ec..d8265f2 100644 --- a/pkg/rest/const.go +++ b/pkg/rest/const.go @@ -2,4 +2,4 @@ package rest const ( ContentTypeJSON = "application/json" -) \ No newline at end of file +) diff --git a/pkg/rest/request/client/client.go b/pkg/rest/request/client/client.go index 2eca7c4..e34f8ce 100644 --- a/pkg/rest/request/client/client.go +++ b/pkg/rest/request/client/client.go @@ -41,7 +41,6 @@ func NewClient(timeout time.Duration, opts ...clientOption) (*HTTPClient, error) return &ctx.wrapped, nil } - func (c *Client) Do(req *http.Request) (*http.Response, error) { return c.Client.Do(req) } diff --git a/pkg/rest/request/request.go b/pkg/rest/request/request.go index 77c9b53..b909821 100644 --- a/pkg/rest/request/request.go +++ b/pkg/rest/request/request.go @@ -5,7 +5,6 @@ import ( "io" ) - func JSONDECODE[T any](body io.Reader, dest *T) error { return json.NewDecoder(body).Decode(dest) -} \ No newline at end of file +} diff --git a/pkg/rest/response/error.go b/pkg/rest/response/error.go index c6a024d..2f7f98e 100644 --- a/pkg/rest/response/error.go +++ b/pkg/rest/response/error.go @@ -29,7 +29,7 @@ var ( Title: string(ErrInternalError), }, ErrNotFound: { - Code: http.StatusNotFound, + Code: http.StatusNotFound, Title: string(ErrNotFound), }, }