diff --git a/.gitignore b/.gitignore index a24e88084..4dda7a12b 100644 --- a/.gitignore +++ b/.gitignore @@ -60,10 +60,13 @@ test_result # Generated configs build/generated/ build/workspace/ +build/workspace-yugabyte/ build/cockroachdb.yaml build/values.yaml build/dss.yaml +deploy/operations/certificates-management/workspace/ + temp # Django stuff: @@ -131,4 +134,4 @@ go .vscode # terraform -.terraform* \ No newline at end of file +.terraform* diff --git a/build/README.md b/build/README.md index 4e8facccf..683b8365a 100644 --- a/build/README.md +++ b/build/README.md @@ -209,7 +209,7 @@ a PR to that effect would be greatly appreciated. to create DNS entries for the static IP addresses created above. To list the IP addresses, use `gcloud compute addresses list`. -1. Use [`make-certs.py` script](./make-certs.py) to create certificates for +1. (Only if you use CockroachDB) Use [`make-certs.py` script](./make-certs.py) to create certificates for the CockroachDB nodes in this DSS instance: ./make-certs.py --cluster $CLUSTER_CONTEXT --namespace $NAMESPACE @@ -243,6 +243,8 @@ a PR to that effect would be greatly appreciated. the rest of the instances, such that ca.crt is the same across all instances. +1. (Only if you use Yugabyte) Use [`css-certs.py` script](../deploy/operations/certificates-management/README.md) to create certificates for the Yugabyte nodes in this DSS instance. + 1. If joining an existing DSS pool, share ca.crt with the DSS instance(s) you are trying to join, and have them apply the new ca.crt, which now contains both your instance's and the original instance's public certs, to enable @@ -251,14 +253,28 @@ a PR to that effect would be greatly appreciated. actions below. While they are performing those actions, you may continue with the instructions. - 1. Overwrite its existing ca.crt with the new ca.crt provided by the DSS - instance joining the pool. - 1. Upload the new ca.crt to its cluster using - `./apply-certs.sh $CLUSTER_CONTEXT $NAMESPACE` - 1. Restart their CockroachDB pods to recognize the updated ca.crt: - `kubectl rollout restart statefulset/cockroachdb --namespace $NAMESPACE` - 1. Inform you when their CockroachDB pods have finished restarting - (typically around 10 minutes) + 1. If you use CockroachDB: + + 1. Overwrite its existing ca.crt with the new ca.crt provided by the DSS + instance joining the pool. + 1. Upload the new ca.crt to its cluster using + `./apply-certs.sh $CLUSTER_CONTEXT $NAMESPACE` + 1. Restart their CockroachDB pods to recognize the updated ca.crt: + `kubectl rollout restart statefulset/cockroachdb --namespace $NAMESPACE` + 1. Inform you when their CockroachDB pods have finished restarting + (typically around 10 minutes) + + 1. If you use Yugabyte + + 1. Share your CA with `./dss-certs.py get-ca` + 1. Add others CAs of the pool with `./dss-certs.py add-pool-ca` + 1. Upload the new CAs to its cluster using + `./dss-certs.py apply` + 1. Restart their Yugabyte pods to recognize the updated ca.crt: + `kubectl rollout restart statefulset/yb-master --namespace $NAMESPACE` + `kubectl rollout restart statefulset/yb-tserver --namespace $NAMESPACE` + 1. Inform you when their Yugabyte pods have finished restarting + (typically around 10 minutes) 1. Ensure the Docker images are built according to the instructions in the [previous section](#docker-images). @@ -295,10 +311,10 @@ a PR to that effect would be greatly appreciated. DSS v0.16, the recommended CockroachDB image name is `cockroachdb/cockroach:v21.2.7`. From DSS v0.17, the recommended CockroachDB version is `cockroachdb/cockroach:v24.1.3`. - 1. `VAR_CRDB_HOSTNAME_SUFFIX`: The domain name suffix shared by all of your + 1. `VAR_DB_HOSTNAME_SUFFIX`: The domain name suffix shared by all of your CockroachDB nodes. For instance, if your CRDB nodes were addressable at `0.db.example.com`, `1.db.example.com`, and `2.db.example.com`, then - VAR_CRDB_HOSTNAME_SUFFIX would be `db.example.com`. + VAR_DB_HOSTNAME_SUFFIX would be `db.example.com`. 1. `VAR_CRDB_LOCALITY`: Unique name for your DSS instance. Currently, we recommend "_", and the `=` character is not diff --git a/build/make-certs.py b/build/make-certs.py index 2ea8f1b0e..1f54b8310 100755 --- a/build/make-certs.py +++ b/build/make-certs.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import argparse -import itertools -import glob import os import shutil import subprocess diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md index 7c9370807..f681f9f6b 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/README.md @@ -128,4 +128,3 @@ Delete the resources: `kubectl delete -f test-app.yml`. 1. Delete all created resources from the cluster (eg. clean up test as described in the previous section.) 2. Make sure all load balancers and target groups have been removed. 3. Run `terraform destroy`. - diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf index 355cb8f99..77e439a3a 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_dns.tf @@ -1,6 +1,8 @@ locals { crdb_hostnames = var.aws_route53_zone_id == "" ? {} : { for i in aws_eip.ip_crdb[*] : i.tags.ExpectedDNS => i.public_ip } + yugabyte_master_hostnames = var.aws_route53_zone_id == "" ? {} : { for i in aws_eip.ip_yugabyte_masters[*] : i.tags.ExpectedDNS => i.public_ip } + yugabyte_tserver_hostnames = var.aws_route53_zone_id == "" ? {} : { for i in aws_eip.ip_yugabyte_tservers[*] : i.tags.ExpectedDNS => i.public_ip } } @@ -37,3 +39,25 @@ resource "aws_route53_record" "crdb_hostname" { ttl = 300 records = [each.value] } + +# Yugabyte master nodes DNS +resource "aws_route53_record" "yugabyte_master_hostnames" { + for_each = local.yugabyte_master_hostnames + + zone_id = var.aws_route53_zone_id + name = each.key + type = "A" + ttl = 300 + records = [each.value] +} + +# Yugabyte tserver nodes DNS +resource "aws_route53_record" "yugabyte_tserver_hostnames" { + for_each = local.yugabyte_tserver_hostnames + + zone_id = var.aws_route53_zone_id + name = each.key + type = "A" + ttl = 300 + records = [each.value] +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf index b436a49c7..1f9bdb651 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/network_lb.tf @@ -73,12 +73,36 @@ resource "aws_eip" "gateway" { # Public Elastic IPs for the crdb instances resource "aws_eip" "ip_crdb" { - count = var.node_count + count = var.datastore_type == "cockroachdb" ? var.node_count : 0 vpc = true tags = { Name = format("%s-ip-crdb%v", var.cluster_name, count.index) # Preserve mapping between ips and hostnames - ExpectedDNS = format("%s.%s", count.index, var.crdb_hostname_suffix) + ExpectedDNS = format("%s.%s", count.index, var.db_hostname_suffix) + } +} + +# Public Elastic IPs for the yubagybte master instances +resource "aws_eip" "ip_yugabyte_masters" { + count = var.datastore_type == "yugabyte" ? var.node_count : 0 + vpc = true + + tags = { + Name = format("%s-ip-yugabyte-master%v", var.cluster_name, count.index) + # Preserve mapping between ips and hostnames + ExpectedDNS = format("%s.master.%s", count.index, var.db_hostname_suffix) + } +} + +# Public Elastic IPs for the yubagybte tserver instances +resource "aws_eip" "ip_yugabyte_tservers" { + count = var.datastore_type == "yugabyte" ? var.node_count : 0 + vpc = true + + tags = { + Name = format("%s-ip-yugabyte-tserver%v", var.cluster_name, count.index) + # Preserve mapping between ips and hostnames + ExpectedDNS = format("%s.tserver.%s", count.index, var.db_hostname_suffix) } } diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf index a6d238d77..e7bf3dc25 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/output.tf @@ -30,10 +30,42 @@ output "crdb_nodes" { ] } +output "yugabyte_masters_nodes" { + value = [ + for i in aws_eip.ip_yugabyte_masters : { + ip = i.allocation_id + dns = i.tags.ExpectedDNS + } + ] + depends_on = [ + aws_eip.ip_yugabyte_masters + ] +} + +output "yugabyte_tservers_nodes" { + value = [ + for i in aws_eip.ip_yugabyte_tservers : { + ip = i.allocation_id + dns = i.tags.ExpectedDNS + } + ] + depends_on = [ + aws_eip.ip_yugabyte_tservers + ] +} + output "crdb_addresses" { value = [for i in aws_eip.ip_crdb[*] : { expected_dns : i.tags.ExpectedDNS, address : i.public_ip }] } +output "yugabyte_masters_addresses" { + value = [for i in aws_eip.ip_yugabyte_masters[*] : { expected_dns : i.tags.ExpectedDNS, address : i.public_ip }] +} + +output "yugabyte_tservers_addresses" { + value = [for i in aws_eip.ip_yugabyte_tservers[*] : { expected_dns : i.tags.ExpectedDNS, address : i.public_ip }] +} + output "gateway_address" { value = { expected_dns : aws_eip.gateway[0].tags.ExpectedDNS, @@ -56,4 +88,4 @@ output "workload_subnet" { output "iam_role_node_group_arn" { value = aws_iam_role.dss-cluster-node-group.arn -} \ No newline at end of file +} diff --git a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf index 3ec55966b..192149f83 100644 --- a/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-aws-kubernetes/variables.gen.tf @@ -54,17 +54,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "cluster_name" { type = string description = <<-EOT diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf index fc5b73ec0..74fb834ba 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/helm.tf @@ -6,7 +6,7 @@ locals { resource "local_file" "helm_chart_values" { filename = "${local.workspace_location}/helm_values.yml" - content = yamlencode({ + content = var.datastore_type == "cockroachdb" ? yamlencode({ cockroachdb = { image = { tag = var.crdb_image_tag @@ -24,7 +24,7 @@ resource "local_file" "helm_chart_values" { replicas = length(var.crdb_internal_nodes) args = [ "--locality-advertise-addr=zone=${var.crdb_locality}@$(hostname -f)", - "--advertise-addr=$${HOSTNAME##*-}.${var.crdb_hostname_suffix}" + "--advertise-addr=$${HOSTNAME##*-}.${var.db_hostname_suffix}" ] } @@ -69,5 +69,146 @@ resource "local_file" "helm_chart_values" { global = { cloudProvider = var.kubernetes_cloud_provider_name } - }) +}) : yamlencode({ + cockroachdb = { + enabled = false + image = { + tag = "dummy" + } + fullnameOverride = "dummy" + conf = { + cluster-name = "dummy" + locality = "dummy" + } + statefulset = {} + } + yugabyte = { + enabled = true + Image = { + tag = "2.25.2.0-b359" + } + nameOverride = "dss-yugabyte" + + resource = var.yugabyte_light_resources ? { + master = { + requests = { + cpu = "0.1" + memory = "0.5G" + } + } + tserver = { + requests = { + cpu = "0.1" + memory = "0.5G" + } + } + } : {} + enableLoadBalancer = false + + storage = { + master = { + storageClass = var.kubernetes_storage_class + } + tserver = { + storageClass = var.kubernetes_storage_class + } + } + + preflight = { + skipUlimit = true + } + + master = { + extraEnv = [{ + name = "HOSTNAMENO" + valueFrom = { + fieldRef = { + fieldPath = "metadata.labels['apps.kubernetes.io/pod-index']" + } + } + }] + serverBroadcastAddress: "$${HOSTNAMENO}.master.${var.db_hostname_suffix}" + rpcBindAddress: "$${HOSTNAMENO}.master.${var.db_hostname_suffix}" + preCommands: "sed -E \"/\\.svc\\.cluster\\.local/ s/^([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)([[:space:]]+)/\\1 $(echo \"$${HOSTNAMENO}.master.${var.db_hostname_suffix}\" | sed 's/[\\/&]/\\\\&/g')\\2/\" /etc/hosts > /tmp/newhosts && /bin/cp /tmp/newhosts /etc/hosts && \\" + } + + tserver = { + extraEnv = [{ + name = "HOSTNAMENO" + valueFrom = { + fieldRef = { + fieldPath = "metadata.labels['apps.kubernetes.io/pod-index']" + } + } + }] + serverBroadcastAddress: "$${HOSTNAMENO}.tserver.${var.db_hostname_suffix}" + rpcBindAddress: "$${HOSTNAMENO}.tserver.${var.db_hostname_suffix}" + preCommands: "sed -E \"/\\.svc\\.cluster\\.local/ s/^([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+)([[:space:]]+)/\\1 $(echo \"$${HOSTNAMENO}.tserver.${var.db_hostname_suffix}\" | sed 's/[\\/&]/\\\\&/g')\\2/\" /etc/hosts > /tmp/newhosts && /bin/cp /tmp/newhosts /etc/hosts && \\" + } + + gflags = { + master = { + placement_cloud: var.yugabyte_cloud + placement_region: var.yugabyte_region + placement_zone: var.yugabyte_zone + use_private_ip: "zone" + } + tserver = { + placement_cloud: var.yugabyte_cloud + placement_region: var.yugabyte_region + placement_zone: var.yugabyte_zone + use_private_ip: "zone" + } + } + + isMultiAz = true + masterAddresses = join(",", ["0.master.${var.db_hostname_suffix},1.master.${var.db_hostname_suffix},2.master.${var.db_hostname_suffix}", join(",", var.yugabyte_external_nodes)]) + } + + loadBalancers = { + cockroachdbNodes = [] + + yugabyteMasterNodes = [ + for ip in var.yugabyte_internal_masters_nodes[*].ip : + { + ip = ip + subnet = var.workload_subnet + } + ] + + yugabyteTserverNodes = [ + for ip in var.yugabyte_internal_tservers_nodes[*].ip : + { + ip = ip + subnet = var.workload_subnet + } + ] + + dssGateway = { + ip = var.ip_gateway + subnet = var.workload_subnet + certName = var.gateway_cert_name + sslPolicy = var.ssl_policy + } + } + + dss = { + image = var.image + + conf = { + pubKeys = [ + "/test-certs/auth2.pem" + ] + jwksEndpoint = var.authorization.jwks != null ? var.authorization.jwks.endpoint : "" + jwksKeyIds = var.authorization.jwks != null ? [var.authorization.jwks.key_id] : [] + hostname = var.app_hostname + enableScd = var.enable_scd + } + } + + global = { + cloudProvider = var.kubernetes_cloud_provider_name + } +}) + } diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/scripts.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/scripts.tf index 01ee71f17..240b35b29 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/scripts.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/scripts.tf @@ -1,5 +1,6 @@ resource "local_file" "make_certs" { + count = var.datastore_type == "cockroachdb" ? 1 : 0 content = templatefile("${path.module}/templates/make-certs.sh.tmp", { cluster_context = var.kubernetes_context_name namespace = var.kubernetes_namespace @@ -10,6 +11,7 @@ resource "local_file" "make_certs" { } resource "local_file" "apply_certs" { + count = var.datastore_type == "cockroachdb" ? 1 : 0 content = templatefile("${path.module}/templates/apply-certs.sh.tmp", { cluster_context = var.kubernetes_context_name namespace = var.kubernetes_namespace @@ -17,6 +19,16 @@ resource "local_file" "apply_certs" { filename = "${local.workspace_location}/apply-certs.sh" } +resource "local_file" "dss_certs" { + count = var.datastore_type == "yugabyte" ? 1 : 0 + content = templatefile("${path.module}/templates/dss-certs.sh.tmp", { + cluster_context = var.kubernetes_context_name + namespace = var.kubernetes_namespace + db_hostname_suffix = var.db_hostname_suffix + }) + filename = "${local.workspace_location}/dss-certs.sh" +} + resource "local_file" "get_credentials" { content = templatefile("${path.module}/templates/get-credentials.sh.tmp", { get_credentials_cmd = var.kubernetes_get_credentials_cmd diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf index b02801730..f065f7489 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/tanka.tf @@ -4,7 +4,7 @@ resource "local_file" "tanka_config_main" { VAR_NAMESPACE = var.kubernetes_namespace VAR_CLUSTER_CONTEXT = var.kubernetes_context_name VAR_ENABLE_SCD = var.enable_scd - VAR_CRDB_HOSTNAME_SUFFIX = var.crdb_hostname_suffix + VAR_DB_HOSTNAME_SUFFIX = var.db_hostname_suffix VAR_CRDB_LOCALITY = var.crdb_locality VAR_CRDB_NODE_IPS = join(",", [for i in var.crdb_internal_nodes[*].ip : "'${i}'"]) VAR_INGRESS_NAME = var.ip_gateway diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/dss-certs.sh.tmp b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/dss-certs.sh.tmp new file mode 100644 index 000000000..9f3c26e42 --- /dev/null +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/dss-certs.sh.tmp @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# This file was automatically generated by terraform-commons-dss. +# Do not edit it directly. + +set -eo pipefail + +OS=$(uname) +if [[ "$OS" == "Darwin" ]]; then + # OSX uses BSD readlink + BASEDIR="$(dirname "$0")" +else + BASEDIR=$(readlink -e "$(dirname "$0")") +fi +cd "$BASEDIR/../../../deploy/operations/certificates-management/" || exit 1 + +./dss-certs.py --name ${cluster_context} --organization default_orga --cluster-context ${cluster_context} --nodes-public-address "..${db_hostname_suffix}" --namespace ${namespace} "$@" diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp index e9f6de78b..9bd9b3ca8 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/templates/main.jsonnet.tmp @@ -13,7 +13,7 @@ local metadata = metadataBase { enableScd: ${VAR_ENABLE_SCD}, // <-- This boolean value is VAR_ENABLE_SCD cockroach+: { image: '${VAR_CRDB_DOCKER_IMAGE_NAME}', - hostnameSuffix: '${VAR_CRDB_HOSTNAME_SUFFIX}', + hostnameSuffix: '${VAR_DB_HOSTNAME_SUFFIX}', locality: '${VAR_CRDB_LOCALITY}', nodeIPs: [${VAR_CRDB_NODE_IPS}], shouldInit: ${VAR_SHOULD_INIT}, diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf index 32be949d2..83ba69568 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables.gen.tf @@ -11,17 +11,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "image" { type = string description = <<-EOT @@ -132,10 +150,13 @@ variable "should_init" { for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool. + Only used for CockroachDB with Tanka + Example: `true` EOT } + variable "desired_rid_db_version" { type = string description = <<-EOT @@ -225,3 +246,62 @@ variable "kubernetes_namespace" { } } +variable "yugabyte_cloud" { + type = string + description = <<-EOT + Cloud of yugabyte instances, used for partionning. + + Should be set to dss unless you're doing advanced partitionning. + EOT + + default = "dss" +} + + +variable "yugabyte_region" { + type = string + description = <<-EOT + Region of yugabyte instances, used for partionning. + + Should be different from others USS in a cluster. + EOT + + default = "uss-1" +} + + +variable "yugabyte_zone" { + type = string + description = <<-EOT + Zone of yugabyte instances, used for partionning. + + Should be set to zone unless you're doing advanced partitionning. + EOT + + default = "zone" +} + + +variable "yugabyte_light_resources" { + type = bool + description = <<-EOT + Enable light resources reservation for yugabyte instances. + + Useful for a dev cluster when you don't want to overload your kubernetes cluster. + EOT + + default = false +} + + +variable "yugabyte_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. + Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"] + EOT + default = [] +} + + + diff --git a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf index 0c6b28b3e..d56ca25f1 100644 --- a/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf +++ b/deploy/infrastructure/dependencies/terraform-commons-dss/variables_internal.tf @@ -29,6 +29,22 @@ variable "crdb_internal_nodes" { description = "List of the IP addresses and related dns for the Cockroach DB nodes" } +variable "yugabyte_internal_masters_nodes" { + type = list(object({ + dns = string + ip = string + })) + description = "List of the IP addresses and related dns for the Yugabyte DB master nodes" +} + +variable "yugabyte_internal_tservers_nodes" { + type = list(object({ + dns = string + ip = string + })) + description = "List of the IP addresses and related dns for the Yugabyte DB tserver nodes" +} + variable "ip_gateway" { type = string description = "IP of the gateway used by the DSS service" diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf index cb0416c01..cadca0f96 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/cluster.tf @@ -52,12 +52,31 @@ resource "google_compute_global_address" "ip_gateway" { # Static IP addresses for CRDB instances resource "google_compute_address" "ip_crdb" { - count = var.node_count + count = var.datastore_type == "cockroachdb" ? var.node_count : 0 name = format("%s-ip-crdb%v", var.cluster_name, count.index) region = local.region # Current google terraform provider doesn't allow tags or labels. Description is used to preserve mapping between ips and hostnames. - description = format("%s.%s", count.index, var.crdb_hostname_suffix) + description = format("%s.%s", count.index, var.db_hostname_suffix) +} + +# Static IP addresses for yugabyte instances +resource "google_compute_address" "ip_yugabyte_masters" { + count = var.datastore_type == "yugabyte" ? var.node_count : 0 + name = format("%s-ip-yugabyte-master%v", var.cluster_name, count.index) + region = local.region + + # Current google terraform provider doesn't allow tags or labels. Description is used to preserve mapping between ips and hostnames. + description = format("%s.master.%s", count.index, var.db_hostname_suffix) +} + +resource "google_compute_address" "ip_yugabyte_tservers" { + count = var.datastore_type == "yugabyte" ? var.node_count : 0 + name = format("%s-ip-yugabyte-tserver%v", var.cluster_name, count.index) + region = local.region + + # Current google terraform provider doesn't allow tags or labels. Description is used to preserve mapping between ips and hostnames. + description = format("%s.tserver.%s", count.index, var.db_hostname_suffix) } locals { diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf index 85147bcc8..f35ae6227 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/dns.tf @@ -16,11 +16,31 @@ resource "google_dns_record_set" "gateway" { } resource "google_dns_record_set" "crdb" { - count = var.google_dns_managed_zone_name == "" ? 0 : var.node_count + count = var.google_dns_managed_zone_name == "" || var.datastore_type != "cockroachdb" ? 0 : var.node_count name = "${google_compute_address.ip_crdb[count.index].description}." # description contains the expected hostname type = "A" ttl = 300 managed_zone = data.google_dns_managed_zone.default[0].name rrdatas = [google_compute_address.ip_crdb[count.index].address] -} \ No newline at end of file +} + +resource "google_dns_record_set" "yugabyte_masters" { + count = var.google_dns_managed_zone_name == "" || var.datastore_type != "yugabyte" ? 0 : var.node_count + name = "${google_compute_address.ip_yugabyte_masters[count.index].description}." # description contains the expected hostname + type = "A" + ttl = 300 + + managed_zone = data.google_dns_managed_zone.default[0].name + rrdatas = [google_compute_address.ip_yugabyte_masters[count.index].address] +} + +resource "google_dns_record_set" "yugabyte_tserver" { + count = var.google_dns_managed_zone_name == "" || var.datastore_type != "yugabyte" ? 0 : var.node_count + name = "${google_compute_address.ip_yugabyte_tservers[count.index].description}." # description contains the expected hostname + type = "A" + ttl = 300 + + managed_zone = data.google_dns_managed_zone.default[0].name + rrdatas = [google_compute_address.ip_yugabyte_tservers[count.index].address] +} diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf index aa2a27cda..b239e572b 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/output.tf @@ -2,6 +2,14 @@ output "crdb_addresses" { value = [for a in google_compute_address.ip_crdb[*] : { expected_dns : a.description, address : a.address }] } +output "yugabyte_masters_addresses" { + value = [for a in google_compute_address.ip_yugabyte_masters[*] : { expected_dns : a.description, address : a.address }] +} + +output "yugabyte_tservers_addresses" { + value = [for a in google_compute_address.ip_yugabyte_tservers[*] : { expected_dns : a.description, address : a.address }] +} + output "gateway_address" { value = { expected_dns : google_compute_global_address.ip_gateway.description, @@ -41,3 +49,21 @@ output "crdb_nodes" { } ] } + +output "yugabyte_masters_nodes" { + value = [ + for i in google_compute_address.ip_yugabyte_masters : { + ip = i.address + dns = i.description + } + ] +} + +output "yugabyte_tservers_nodes" { + value = [ + for i in google_compute_address.ip_yugabyte_tservers : { + ip = i.address + dns = i.description + } + ] +} diff --git a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf index 07d364c55..aa5654c8b 100644 --- a/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf +++ b/deploy/infrastructure/dependencies/terraform-google-kubernetes/variables.gen.tf @@ -43,17 +43,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "cluster_name" { type = string description = <<-EOT diff --git a/deploy/infrastructure/modules/terraform-aws-dss/README.md b/deploy/infrastructure/modules/terraform-aws-dss/README.md index 9d91288bd..191b3f712 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/README.md +++ b/deploy/infrastructure/modules/terraform-aws-dss/README.md @@ -1,6 +1,6 @@ # terraform-aws-dss -This terraform module creates a Kubernetes cluster in Amazon Web Services using the Elastic Kubernetes Service (EKS) +This terraform module creates a Kubernetes cluster in Amazon Web Services using the Elastic Kubernetes Service (EKS) and generates the tanka files to deploy a DSS instance. @@ -15,7 +15,7 @@ Download & install the following tools to your workstation: 3. Install provider specific tools: 1. [Amazon Web Services](./README.md#amazon-web-services) - + #### Amazon Web Services 1. Install and initialize [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html#getting-started-install-instructions). @@ -25,7 +25,7 @@ Download & install the following tools to your workstation: 1. We recommend to create an AWS_PROFILE using for instance `aws configure --profile aws-interuss-dss` Before running `terraform` commands, run once in your shell: `export AWS_PROFILE=aws-interuss-dss` Other methods are described here: https://registry.terraform.io/providers/hashicorp/aws/latest/docs#authentication-and-configuration - + ### Deployment of the Kubernetes cluster @@ -54,8 +54,13 @@ It contains scripts to operate the cluster and setup the services. 1. Go to the new workspace `/build/workspace/${cluster_context}`. 2. Run `./get-credentials.sh` to login to kubernetes. You can now access the cluster with `kubectl`. -3. Generate the certificates using `./make-certs.sh`. Follow script instructions if you are not initializing the cluster. -4. Deploy the certificates using `./apply-certs.sh`. +3. If using CockroachDB: + 1. Generate the certificates using `./make-certs.sh`. Follow script instructions if you are not initializing the cluster. + 1. Deploy the certificates using `./apply-certs.sh`. +4. If using Yugabyte: + 1. Generate the certificates using `./dss-certs.sh init` + 1. If joining a cluster, check `dss-certs.sh`'s [help](../../../operations/certificates-management/README.md) to add others CA in your pool and share your CA with others pools members. + 1. Deploy the certificates using `./dss-certs.sh apply`. 5. Run `tk apply .` to deploy the services to kubernetes. (This may take up to 30 min) 6. Wait for services to initialize: - On AWS, load balancers and certificates are created by Kubernetes Operators. Therefore, it may take few minutes (~5min) to get the services up and running and generate the certificate. To track this progress, go to the following pages and check that: @@ -68,6 +73,6 @@ It contains scripts to operate the cluster and setup the services. 1. Note that the following operations can't be reverted and all data will be lost. 2. To delete all resources, run `tk delete .` in the workspace folder. -3. Make sure that all [load balancers](https://eu-west-1.console.aws.amazon.com/ec2/home#LoadBalancers:) and [target groups](https://eu-west-1.console.aws.amazon.com/ec2/home#TargetGroups:) have been deleted from the AWS region before next step. +3. Make sure that all [load balancers](https://eu-west-1.console.aws.amazon.com/ec2/home#LoadBalancers:) and [target groups](https://eu-west-1.console.aws.amazon.com/ec2/home#TargetGroups:) have been deleted from the AWS region before next step. 4. `terraform destroy` in your infrastructure folder. -5. On the [EBS page](https://eu-west-1.console.aws.amazon.com/ec2/home#Volumes:), make sure to manually clean up the persistent storage. Note that the correct AWS region shall be selected. +5. On the [EBS page](https://eu-west-1.console.aws.amazon.com/ec2/home#Volumes:), make sure to manually clean up the persistent storage. Note that the correct AWS region shall be selected. diff --git a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md index dfaaf89cc..1ba18ebd7 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md +++ b/deploy/infrastructure/modules/terraform-aws-dss/TFVARS.gen.md @@ -56,13 +56,20 @@ Leave empty to disable record creation.

Example: dss.example.com

- crdb_hostname_suffix + db_hostname_suffix string -

The domain name suffix shared by all of your CockroachDB nodes. -For instance, if your CRDB nodes were addressable at 0.db.example.com, -1.db.example.com and 2.db.example.com, then the value would be db.example.com.

+

The domain name suffix shared by all of your databases nodes. +For instance, if your database nodes were addressable at 0.db.example.com, +1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com.

Example: db.example.com

+ + + datastore_type + string + "cockroachdb" +

Type of datastore used

+

Supported technologies: cockroachdb, yugabyte

cluster_name @@ -182,6 +189,7 @@ Example:

Set to false if joining an existing pool, true if creating the first DSS instance for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool.

+

Only used for CockroachDB with Tanka

Example: true

@@ -243,6 +251,41 @@ Example: ["0.db.dss.example.com", "1.db.dss.example.com", &q "default"

Namespace where to deploy Kubernetes resources. Only default is supported at the moment.

Example: default

+ + + yugabyte_cloud + string + "dss" +

Cloud of yugabyte instances, used for partionning.

+

Should be set to dss unless you're doing advanced partitionning.

+ + + yugabyte_region + string + "uss-1" +

Region of yugabyte instances, used for partionning.

+

Should be different from others USS in a cluster.

+ + + yugabyte_zone + string + "zone" +

Zone of yugabyte instances, used for partionning.

+

Should be set to zone unless you're doing advanced partitionning.

+ + + yugabyte_light_resources + bool + false +

Enable light resources reservation for yugabyte instances.

+

Useful for a dev cluster when you don't want to overload your kubernetes cluster.

+ + + yugabyte_external_nodes + list(string) + [] +

Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. +Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"]

\ No newline at end of file diff --git a/deploy/infrastructure/modules/terraform-aws-dss/main.tf b/deploy/infrastructure/modules/terraform-aws-dss/main.tf index effdfae82..4f7257d0c 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/main.tf +++ b/deploy/infrastructure/modules/terraform-aws-dss/main.tf @@ -3,7 +3,8 @@ module "terraform-aws-kubernetes" { cluster_name = var.cluster_name aws_region = var.aws_region app_hostname = var.app_hostname - crdb_hostname_suffix = var.crdb_hostname_suffix + db_hostname_suffix = var.db_hostname_suffix + datastore_type = var.datastore_type aws_instance_type = var.aws_instance_type aws_route53_zone_id = var.aws_route53_zone_id aws_iam_permissions_boundary = var.aws_iam_permissions_boundary @@ -15,26 +16,34 @@ module "terraform-aws-kubernetes" { module "terraform-commons-dss" { # See variables.tf for variables description. - image = var.image - image_pull_secret = var.image_pull_secret - kubernetes_namespace = var.kubernetes_namespace - kubernetes_storage_class = var.aws_kubernetes_storage_class - app_hostname = var.app_hostname - crdb_image_tag = var.crdb_image_tag - crdb_cluster_name = var.crdb_cluster_name - crdb_hostname_suffix = var.crdb_hostname_suffix - should_init = var.should_init - authorization = var.authorization - crdb_locality = var.crdb_locality - crdb_external_nodes = var.crdb_external_nodes - crdb_internal_nodes = module.terraform-aws-kubernetes.crdb_nodes - ip_gateway = module.terraform-aws-kubernetes.ip_gateway - kubernetes_api_endpoint = module.terraform-aws-kubernetes.kubernetes_api_endpoint - kubernetes_cloud_provider_name = module.terraform-aws-kubernetes.kubernetes_cloud_provider_name - kubernetes_context_name = module.terraform-aws-kubernetes.kubernetes_context_name - kubernetes_get_credentials_cmd = module.terraform-aws-kubernetes.kubernetes_get_credentials_cmd - workload_subnet = module.terraform-aws-kubernetes.workload_subnet - gateway_cert_name = module.terraform-aws-kubernetes.app_hostname_cert_arn + image = var.image + image_pull_secret = var.image_pull_secret + kubernetes_namespace = var.kubernetes_namespace + kubernetes_storage_class = var.aws_kubernetes_storage_class + app_hostname = var.app_hostname + crdb_image_tag = var.crdb_image_tag + crdb_cluster_name = var.crdb_cluster_name + db_hostname_suffix = var.db_hostname_suffix + datastore_type = var.datastore_type + should_init = var.should_init + authorization = var.authorization + crdb_locality = var.crdb_locality + crdb_external_nodes = var.crdb_external_nodes + yugabyte_cloud = var.yugabyte_cloud + yugabyte_region = var.yugabyte_region + yugabyte_zone = var.yugabyte_zone + yugabyte_light_resources = var.yugabyte_light_resources + yugabyte_external_nodes = var.yugabyte_external_nodes + crdb_internal_nodes = module.terraform-aws-kubernetes.crdb_nodes + yugabyte_internal_masters_nodes = module.terraform-aws-kubernetes.yugabyte_masters_nodes + yugabyte_internal_tservers_nodes = module.terraform-aws-kubernetes.yugabyte_tservers_nodes + ip_gateway = module.terraform-aws-kubernetes.ip_gateway + kubernetes_api_endpoint = module.terraform-aws-kubernetes.kubernetes_api_endpoint + kubernetes_cloud_provider_name = module.terraform-aws-kubernetes.kubernetes_cloud_provider_name + kubernetes_context_name = module.terraform-aws-kubernetes.kubernetes_context_name + kubernetes_get_credentials_cmd = module.terraform-aws-kubernetes.kubernetes_get_credentials_cmd + workload_subnet = module.terraform-aws-kubernetes.workload_subnet + gateway_cert_name = module.terraform-aws-kubernetes.app_hostname_cert_arn source = "../../dependencies/terraform-commons-dss" } diff --git a/deploy/infrastructure/modules/terraform-aws-dss/output.tf b/deploy/infrastructure/modules/terraform-aws-dss/output.tf index 4ab65ab50..9d282a99f 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/output.tf +++ b/deploy/infrastructure/modules/terraform-aws-dss/output.tf @@ -2,6 +2,14 @@ output "crdb_addresses" { value = module.terraform-aws-kubernetes.crdb_addresses } +output "yugabyte_masters_addresses" { + value = module.terraform-aws-kubernetes.yugabyte_masters_addresses +} + +output "yugabyte_tservers_addresses" { + value = module.terraform-aws-kubernetes.yugabyte_tservers_addresses +} + output "gateway_address" { value = module.terraform-aws-kubernetes.gateway_address } diff --git a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars index 718ab40bd..7b159f15d 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars +++ b/deploy/infrastructure/modules/terraform-aws-dss/terraform.dev.example.tfvars @@ -9,7 +9,7 @@ aws_route53_zone_id = "Z01551234567890123456" # Hostnames app_hostname = "dss.interuss.example.com" -crdb_hostname_suffix = "db.interuss.example.com" +db_hostname_suffix = "db.interuss.example.com" # Kubernetes configuration cluster_name = "dss-dev-ew1" @@ -23,10 +23,19 @@ image = "docker.io/interuss/dss:latest" authorization = { public_key_pem_path = "/test-certs/auth2.pem" } -should_init = true + +# Datastore +datastore_type = "cockroachdb" # CockroachDB crdb_image_tag = "v24.1.3" crdb_cluster_name = "interuss_example" crdb_locality = "interuss_dss-aws-ew1" crdb_external_nodes = [] +should_init = true + +# Yugabyte +yugabyte_region = "aws-uss-1" +yugabyte_zone = "aws-uss-1" +yugabyte_light_resources = false +yugabyte_external_nodes = [] diff --git a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf index d5bcaca07..fd1b51a7f 100644 --- a/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf +++ b/deploy/infrastructure/modules/terraform-aws-dss/variables.gen.tf @@ -54,17 +54,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "cluster_name" { type = string description = <<-EOT @@ -226,10 +244,13 @@ variable "should_init" { for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool. + Only used for CockroachDB with Tanka + Example: `true` EOT } + variable "desired_rid_db_version" { type = string description = <<-EOT @@ -319,3 +340,62 @@ variable "kubernetes_namespace" { } } +variable "yugabyte_cloud" { + type = string + description = <<-EOT + Cloud of yugabyte instances, used for partionning. + + Should be set to dss unless you're doing advanced partitionning. + EOT + + default = "dss" +} + + +variable "yugabyte_region" { + type = string + description = <<-EOT + Region of yugabyte instances, used for partionning. + + Should be different from others USS in a cluster. + EOT + + default = "uss-1" +} + + +variable "yugabyte_zone" { + type = string + description = <<-EOT + Zone of yugabyte instances, used for partionning. + + Should be set to zone unless you're doing advanced partitionning. + EOT + + default = "zone" +} + + +variable "yugabyte_light_resources" { + type = bool + description = <<-EOT + Enable light resources reservation for yugabyte instances. + + Useful for a dev cluster when you don't want to overload your kubernetes cluster. + EOT + + default = false +} + + +variable "yugabyte_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. + Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"] + EOT + default = [] +} + + + diff --git a/deploy/infrastructure/modules/terraform-google-dss/README.md b/deploy/infrastructure/modules/terraform-google-dss/README.md index 62b6c6dbc..5070da4fb 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/README.md +++ b/deploy/infrastructure/modules/terraform-google-dss/README.md @@ -1,6 +1,6 @@ # terraform-google-dss -This terraform module creates a Kubernetes cluster in Google Cloud Engine and generates +This terraform module creates a Kubernetes cluster in Google Cloud Engine and generates the tanka files to deploy a DSS instance. ## Getting started @@ -12,7 +12,7 @@ Download & install the following tools to your workstation: 2. Install tools from [Prerequisites](../../../../build/README.md) 3. Install provider specific tools: 1. [Google Cloud Engine](./README.md#google-cloud-engine) - + #### Google Cloud Engine 1. Install and initialize [Google Cloud CLI](https://cloud.google.com/sdk/docs/install-sdk). @@ -41,7 +41,7 @@ Download & install the following tools to your workstation: 6. In the new directory (ie /deploy/infrastructure/personal/terraform-google-dss-dev), initialize terraform: `terraform init`. 7. Run `terraform plan` to check that the configuration is valid. It will display the resources which will be provisioned. 8. Run `terraform apply` to deploy the cluster. (This operation may take up to 15 min.) -9. Configure the DNS resolution to the public ip addresses. DNS entries can be either managed manually or +9. Configure the DNS resolution to the public ip addresses. DNS entries can be either managed manually or handled by terraform depending on the cloud provider. See [DNS](DNS.md) for details. ## Deployment of the DSS services @@ -54,8 +54,13 @@ It contains scripts to operate the cluster and setup the services. 1. Go to the new workspace `/build/workspace/${cluster_context}`. 2. Run `./get-credentials.sh` to login to kubernetes. You can now access the cluster with `kubectl`. -3. Generate the certificates using `./make-certs.sh`. Follow script instructions if you are not initializing the cluster. -4. Deploy the certificates using `./apply-certs.sh`. +3. If using CockroachDB: + 1. Generate the certificates using `./make-certs.sh`. Follow script instructions if you are not initializing the cluster. + 1. Deploy the certificates using `./apply-certs.sh`. +4. If using Yugabyte: + 1. Generate the certificates using `./dss-certs.sh init` + 1. If joining a cluster, check `dss-certs.sh`'s [help](../../../operations/certificates-management/README.md) to add others CA in your pool and share your CA with others pools members. + 1. Deploy the certificates using `./dss-certs.sh apply`. 5. Run `tk apply .` to deploy the services to kubernetes. (This may take up to 30 min) 6. Wait for services to initialize: - On Google Cloud, the highest-latency operation is provisioning of the HTTPS certificate which generally takes 10-45 minutes. To track this progress: @@ -71,4 +76,4 @@ It contains scripts to operate the cluster and setup the services. To delete all resources, run `terraform destroy`. Note that this operation can't be reverted and all data will be lost. -For Google Cloud Engine, make sure to manually clean up the persistent storage: https://console.cloud.google.com/compute/disks +For Google Cloud Engine, make sure to manually clean up the persistent storage: https://console.cloud.google.com/compute/disks diff --git a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md index 07f85cd79..56cb99bd8 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md +++ b/deploy/infrastructure/modules/terraform-google-dss/TFVARS.gen.md @@ -52,13 +52,20 @@ Example: n2-standard-4 for production, e2-medium for d

Example: dss.example.com

- crdb_hostname_suffix + db_hostname_suffix string -

The domain name suffix shared by all of your CockroachDB nodes. -For instance, if your CRDB nodes were addressable at 0.db.example.com, -1.db.example.com and 2.db.example.com, then the value would be db.example.com.

+

The domain name suffix shared by all of your databases nodes. +For instance, if your database nodes were addressable at 0.db.example.com, +1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com.

Example: db.example.com

+ + + datastore_type + string + "cockroachdb" +

Type of datastore used

+

Supported technologies: cockroachdb, yugabyte

cluster_name @@ -178,6 +185,7 @@ Example:

Set to false if joining an existing pool, true if creating the first DSS instance for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool.

+

Only used for CockroachDB with Tanka

Example: true

@@ -239,6 +247,41 @@ Example: ["0.db.dss.example.com", "1.db.dss.example.com", &q "default"

Namespace where to deploy Kubernetes resources. Only default is supported at the moment.

Example: default

+ + + yugabyte_cloud + string + "dss" +

Cloud of yugabyte instances, used for partionning.

+

Should be set to dss unless you're doing advanced partitionning.

+ + + yugabyte_region + string + "uss-1" +

Region of yugabyte instances, used for partionning.

+

Should be different from others USS in a cluster.

+ + + yugabyte_zone + string + "zone" +

Zone of yugabyte instances, used for partionning.

+

Should be set to zone unless you're doing advanced partitionning.

+ + + yugabyte_light_resources + bool + false +

Enable light resources reservation for yugabyte instances.

+

Useful for a dev cluster when you don't want to overload your kubernetes cluster.

+ + + yugabyte_external_nodes + list(string) + [] +

Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. +Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"]

\ No newline at end of file diff --git a/deploy/infrastructure/modules/terraform-google-dss/main.tf b/deploy/infrastructure/modules/terraform-google-dss/main.tf index ea98a4dd6..33d370f65 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/main.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/main.tf @@ -4,7 +4,8 @@ module "terraform-google-kubernetes" { cluster_name = var.cluster_name google_zone = var.google_zone app_hostname = var.app_hostname - crdb_hostname_suffix = var.crdb_hostname_suffix + db_hostname_suffix = var.db_hostname_suffix + datastore_type = var.datastore_type google_dns_managed_zone_name = var.google_dns_managed_zone_name google_machine_type = var.google_machine_type node_count = var.node_count @@ -15,25 +16,33 @@ module "terraform-google-kubernetes" { module "terraform-commons-dss" { # See variables.tf for variables description. - image = var.image - kubernetes_namespace = var.kubernetes_namespace - kubernetes_storage_class = var.google_kubernetes_storage_class - app_hostname = var.app_hostname - crdb_image_tag = var.crdb_image_tag - crdb_cluster_name = var.crdb_cluster_name - crdb_hostname_suffix = var.crdb_hostname_suffix - should_init = var.should_init - authorization = var.authorization - crdb_locality = var.crdb_locality - image_pull_secret = var.image_pull_secret - crdb_external_nodes = var.crdb_external_nodes - kubernetes_api_endpoint = module.terraform-google-kubernetes.kubernetes_api_endpoint - crdb_internal_nodes = module.terraform-google-kubernetes.crdb_nodes - ip_gateway = module.terraform-google-kubernetes.ip_gateway - ssl_policy = module.terraform-google-kubernetes.ssl_policy - kubernetes_cloud_provider_name = module.terraform-google-kubernetes.kubernetes_cloud_provider_name - kubernetes_context_name = module.terraform-google-kubernetes.kubernetes_context_name - kubernetes_get_credentials_cmd = module.terraform-google-kubernetes.kubernetes_get_credentials_cmd + image = var.image + kubernetes_namespace = var.kubernetes_namespace + kubernetes_storage_class = var.google_kubernetes_storage_class + app_hostname = var.app_hostname + crdb_image_tag = var.crdb_image_tag + crdb_cluster_name = var.crdb_cluster_name + db_hostname_suffix = var.db_hostname_suffix + datastore_type = var.datastore_type + should_init = var.should_init + authorization = var.authorization + crdb_locality = var.crdb_locality + image_pull_secret = var.image_pull_secret + crdb_external_nodes = var.crdb_external_nodes + yugabyte_cloud = var.yugabyte_cloud + yugabyte_region = var.yugabyte_region + yugabyte_zone = var.yugabyte_zone + yugabyte_light_resources = var.yugabyte_light_resources + yugabyte_external_nodes = var.yugabyte_external_nodes + kubernetes_api_endpoint = module.terraform-google-kubernetes.kubernetes_api_endpoint + crdb_internal_nodes = module.terraform-google-kubernetes.crdb_nodes + yugabyte_internal_masters_nodes = module.terraform-google-kubernetes.yugabyte_masters_nodes + yugabyte_internal_tservers_nodes = module.terraform-google-kubernetes.yugabyte_tservers_nodes + ip_gateway = module.terraform-google-kubernetes.ip_gateway + ssl_policy = module.terraform-google-kubernetes.ssl_policy + kubernetes_cloud_provider_name = module.terraform-google-kubernetes.kubernetes_cloud_provider_name + kubernetes_context_name = module.terraform-google-kubernetes.kubernetes_context_name + kubernetes_get_credentials_cmd = module.terraform-google-kubernetes.kubernetes_get_credentials_cmd source = "../../dependencies/terraform-commons-dss" } diff --git a/deploy/infrastructure/modules/terraform-google-dss/output.tf b/deploy/infrastructure/modules/terraform-google-dss/output.tf index 7be13880f..0f1486a3d 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/output.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/output.tf @@ -2,6 +2,14 @@ output "crdb_addresses" { value = module.terraform-google-kubernetes.crdb_addresses } +output "yugabyte_masters_addresses" { + value = module.terraform-google-kubernetes.yugabyte_masters_addresses +} + +output "yugabyte_tservers_addresses" { + value = module.terraform-google-kubernetes.yugabyte_tservers_addresses +} + output "gateway_address" { value = module.terraform-google-kubernetes.gateway_address } diff --git a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars index c5808f9cd..3aa7ca011 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars +++ b/deploy/infrastructure/modules/terraform-google-dss/terraform.dev.example.tfvars @@ -9,7 +9,7 @@ google_zone = "europe-west6-a" # DNS google_dns_managed_zone_name = "interuss-example-com" app_hostname = "dss.interuss.example.com" -crdb_hostname_suffix = "db.interuss.example.com" +db_hostname_suffix = "db.interuss.example.com" # Kubernetes configuration cluster_name = "dss-dev-w6a" @@ -24,10 +24,19 @@ image_pull_secret = "" authorization = { public_key_pem_path = "/test-certs/auth2.pem" } -should_init = true + +# Datastore +datastore_type = "cockroachdb" # CockroachDB crdb_image_tag = "v24.1.3" crdb_cluster_name = "interuss_example" crdb_locality = "interuss_dss-dev-w6a" crdb_external_nodes = [] +should_init = true + +# Yugabyte +yugabyte_region = "gcp-uss-1" +yugabyte_zone = "gcp-uss-1" +yugabyte_light_resources = false +yugabyte_external_nodes = [] diff --git a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf index 52337cb51..7dbbff280 100644 --- a/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf +++ b/deploy/infrastructure/modules/terraform-google-dss/variables.gen.tf @@ -43,17 +43,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "cluster_name" { type = string description = <<-EOT @@ -215,10 +233,13 @@ variable "should_init" { for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool. + Only used for CockroachDB with Tanka + Example: `true` EOT } + variable "desired_rid_db_version" { type = string description = <<-EOT @@ -308,3 +329,62 @@ variable "kubernetes_namespace" { } } +variable "yugabyte_cloud" { + type = string + description = <<-EOT + Cloud of yugabyte instances, used for partionning. + + Should be set to dss unless you're doing advanced partitionning. + EOT + + default = "dss" +} + + +variable "yugabyte_region" { + type = string + description = <<-EOT + Region of yugabyte instances, used for partionning. + + Should be different from others USS in a cluster. + EOT + + default = "uss-1" +} + + +variable "yugabyte_zone" { + type = string + description = <<-EOT + Zone of yugabyte instances, used for partionning. + + Should be set to zone unless you're doing advanced partitionning. + EOT + + default = "zone" +} + + +variable "yugabyte_light_resources" { + type = bool + description = <<-EOT + Enable light resources reservation for yugabyte instances. + + Useful for a dev cluster when you don't want to overload your kubernetes cluster. + EOT + + default = false +} + + +variable "yugabyte_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. + Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"] + EOT + default = [] +} + + + diff --git a/deploy/infrastructure/utils/definitions/crdb_hostname_suffix.tf b/deploy/infrastructure/utils/definitions/crdb_hostname_suffix.tf deleted file mode 100644 index 005d0d86f..000000000 --- a/deploy/infrastructure/utils/definitions/crdb_hostname_suffix.tf +++ /dev/null @@ -1,10 +0,0 @@ -variable "crdb_hostname_suffix" { - type = string - description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. - - Example: db.example.com - EOT -} \ No newline at end of file diff --git a/deploy/infrastructure/utils/definitions/datastore_type.tf b/deploy/infrastructure/utils/definitions/datastore_type.tf new file mode 100644 index 000000000..b63a17237 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/datastore_type.tf @@ -0,0 +1,15 @@ +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} diff --git a/deploy/infrastructure/utils/definitions/db_hostname_suffix.tf b/deploy/infrastructure/utils/definitions/db_hostname_suffix.tf new file mode 100644 index 000000000..7770b8609 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/db_hostname_suffix.tf @@ -0,0 +1,10 @@ +variable "db_hostname_suffix" { + type = string + description = <<-EOT + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. + + Example: db.example.com + EOT +} diff --git a/deploy/infrastructure/utils/definitions/should_init.tf b/deploy/infrastructure/utils/definitions/should_init.tf index bdbf936df..e61d74259 100644 --- a/deploy/infrastructure/utils/definitions/should_init.tf +++ b/deploy/infrastructure/utils/definitions/should_init.tf @@ -5,6 +5,8 @@ variable "should_init" { for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool. + Only used for CockroachDB with Tanka + Example: `true` EOT -} \ No newline at end of file +} diff --git a/deploy/infrastructure/utils/definitions/yugabyte_cloud.tf b/deploy/infrastructure/utils/definitions/yugabyte_cloud.tf new file mode 100644 index 000000000..51ef755d6 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/yugabyte_cloud.tf @@ -0,0 +1,10 @@ +variable "yugabyte_cloud" { + type = string + description = <<-EOT + Cloud of yugabyte instances, used for partionning. + + Should be set to dss unless you're doing advanced partitionning. + EOT + + default = "dss" +} diff --git a/deploy/infrastructure/utils/definitions/yugabyte_external_nodes.tf b/deploy/infrastructure/utils/definitions/yugabyte_external_nodes.tf new file mode 100644 index 000000000..fd065346d --- /dev/null +++ b/deploy/infrastructure/utils/definitions/yugabyte_external_nodes.tf @@ -0,0 +1,9 @@ +variable "yugabyte_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. + Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"] + EOT + default = [] +} + diff --git a/deploy/infrastructure/utils/definitions/yugabyte_light_resources.tf b/deploy/infrastructure/utils/definitions/yugabyte_light_resources.tf new file mode 100644 index 000000000..f776f7a79 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/yugabyte_light_resources.tf @@ -0,0 +1,10 @@ +variable "yugabyte_light_resources" { + type = bool + description = <<-EOT + Enable light resources reservation for yugabyte instances. + + Useful for a dev cluster when you don't want to overload your kubernetes cluster. + EOT + + default = false +} diff --git a/deploy/infrastructure/utils/definitions/yugabyte_region.tf b/deploy/infrastructure/utils/definitions/yugabyte_region.tf new file mode 100644 index 000000000..17613b9d1 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/yugabyte_region.tf @@ -0,0 +1,10 @@ +variable "yugabyte_region" { + type = string + description = <<-EOT + Region of yugabyte instances, used for partionning. + + Should be different from others USS in a cluster. + EOT + + default = "uss-1" +} diff --git a/deploy/infrastructure/utils/definitions/yugabyte_zone.tf b/deploy/infrastructure/utils/definitions/yugabyte_zone.tf new file mode 100644 index 000000000..735133f14 --- /dev/null +++ b/deploy/infrastructure/utils/definitions/yugabyte_zone.tf @@ -0,0 +1,10 @@ +variable "yugabyte_zone" { + type = string + description = <<-EOT + Zone of yugabyte instances, used for partionning. + + Should be set to zone unless you're doing advanced partitionning. + EOT + + default = "zone" +} diff --git a/deploy/infrastructure/utils/variables.py b/deploy/infrastructure/utils/variables.py index 779720041..515137d10 100755 --- a/deploy/infrastructure/utils/variables.py +++ b/deploy/infrastructure/utils/variables.py @@ -23,7 +23,7 @@ # Variables per project # For all */terraform-* -GLOBAL_VARIABLES = ["app_hostname", "crdb_hostname_suffix"] +GLOBAL_VARIABLES = ["app_hostname", "db_hostname_suffix", "datastore_type"] # dependencies/terraform-commons-dss COMMONS_DSS_VARIABLES = GLOBAL_VARIABLES + [ @@ -39,6 +39,11 @@ "crdb_locality", "crdb_external_nodes", "kubernetes_namespace", + "yugabyte_cloud", + "yugabyte_region", + "yugabyte_zone", + "yugabyte_light_resources", + "yugabyte_external_nodes", ] # dependencies/terraform-*-kubernetes diff --git a/deploy/operations/certificates-management/README.md b/deploy/operations/certificates-management/README.md new file mode 100644 index 000000000..983698c1e --- /dev/null +++ b/deploy/operations/certificates-management/README.md @@ -0,0 +1,131 @@ +# Certificates management + +## Introduction + +The `dss-certs.py` helps you manage the set of certificates used for your DSS deployment. + +Should this DSS beeing part of a pool, the script also provide some helpers to manage the set of CA certificates in the pool. + +To run the script, just run `./dss-certs.py`. The python script don't require any dependencies, just a recent version of python 3. + +## Quick start guide + +### Simple local cluster in minikube` + +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default init` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default apply` + +### Simple pool of 3 local cluster in minikube, in namespace `default`, `ns2` and `ns3` + +* Creation of the 3 cluster's certificates +* `./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default init` +* `./dss-certs.py --name localpool2 --cluster-context dss-local-cluster --namespace ns2 init` +* `./dss-certs.py --name localpool3 --cluster-context dss-local-cluster --namespace ns3 init` +* Copy of cluster 2 and 3 CA to the base cluster +* `./dss-certs.py --name localpool2 --cluster-context dss-local-cluster --namespace ns2 get-ca | ./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default add-pool-ca` +* `./dss-certs.py --name localpool3 --cluster-context dss-local-cluster --namespace ns3 get-ca | ./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default add-pool-ca` +* Copy of base cluster's CA pool to cluster 2 and 3's CA pool +* `./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default get-pool-ca | ./dss-certs.py --name localpool2 --cluster-context dss-local-cluster --namespace ns2 add-pool-ca` +* `./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default get-pool-ca | ./dss-certs.py --name localpool3 --cluster-context dss-local-cluster --namespace ns3 add-pool-ca` +* Application of certificates in respective clusters +* `./dss-certs.py --name localpool --cluster-context dss-local-cluster --namespace default apply` +* `./dss-certs.py --name localpool2 --cluster-context dss-local-cluster --namespace ns2 apply` +* `./dss-certs.py --name localpool3 --cluster-context dss-local-cluster --namespace ns3 apply` + +## Operations + +### Common parameters + +#### `--name` + +The name of your cluster, that should identify it in a unique way. Used as main identifier for the set of certificates and in certificates but may be different inside a DSS pool. + +Example: `dss-west-1` + +#### `--organization` + +The name or your organization. Used in certificates generation. The combination of (name, organization) shall be unique in a cluster. + +Example: `interuss` + +#### `--cluster-context` + +The kubernetes context the script should use. + +Example: `dss-local-cluster` + +#### `--namespace` + +The kubernetes namespace to use. + +Example: `default` + +#### `--nodes-count` + +The number of yugabyte nodes you have. Default to `3`. + +### `init` + +Create a new set of certificates, with a CA, a client certificate and a certificate for each yugabyte node. + +### `apply` + +Apply the current set of certificate to the kubernetes cluster. Shall be ran after each modification of the certificates, like addition / removal of CA in the pool, new `nodes-count` parameter. + +### `regenerate-nodes` + +Generate missing nodes certificates. Useful if you want to add new nodes in your cluster. Don't forget to set the `nodes-count` parameters. + +### `add-pool-ca` + +Add the CA certificate(s) of another(s) USS in the pool of trusted certificates. +Existing certificates are not added again, so you may simply use the output of `get-pool-ca` from another USS. + +You can set the file with certificate(s) with `--ca-file` or use stdin. + +Don't forget to use the `apply` command to update certificate on your kubernetes cluster. + +Examples: + +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default add-pool-ca < /tmp/new-dss-ca` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default --ca-file /tmp/new-dss-ca add-pool-ca` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default get-pool-ca | ./dss-certs.py --name test2 --cluster-context dss-local-cluster --namespace namespace2 add-pool-ca` + +### `remove-pool-ca` + +Remove the CA certificate(s) of another(s) USS in the pool of trusted certificates. +Unknown certificates are not removed again. + +You can set the file with certificate(s) with `--ca-file`, use stdin or use `--ca-serial` to specify the serial / name of the certificate you want to remove. + +Don't forget to use the `apply` command to update certificate on your kubernetes cluster. + +Example: + +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default remove-pool-ca < /tmp/old-dss-ca` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default --ca-file /tmp/old-dss-ca remove-pool-ca` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default remove-pool-ca --ca-serial="SN=830ECFB0, O=generic-dss-organization, CN=CA.test"` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default remove-pool-ca --ca-serial="830ECFB0` +* `./dss-certs.py --name test --cluster-context dss-local-cluster --namespace default remove-pool-ca --ca-serial="46548B7CC9699A7CFA54FF8FA85A619E830ECFB0` + +### `list-pool-ca` + +List the current CA certificates in the CA pool. + +Also display a 'hash' of CA serial, that you may use to compare others USS CA pool certificates list easily. + +### `get-pool-ca` + +Return all CA certificate in the current pool. + +Can be used for debugging or to synchronize the set of CA certificates in a pool with others USS. + +### `get-ca` + +Return your own CA certificate . + +Can be used for debugging or to synchronize the set of CA certificates in a pool with others USS. + +### `destroy` + +Destroy a certificate set. Be careful, there are no way to undo the command. diff --git a/deploy/operations/certificates-management/__init__.py b/deploy/operations/certificates-management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/deploy/operations/certificates-management/apply.py b/deploy/operations/certificates-management/apply.py new file mode 100644 index 000000000..ff056da0c --- /dev/null +++ b/deploy/operations/certificates-management/apply.py @@ -0,0 +1,52 @@ +import subprocess +import os + +import logging +l = logging.getLogger(__name__) + +def do_apply(cluster): + + l.debug("Applying kubernetes configuration") + + l.debug(f"Creating namespace {cluster.namespace}") + + try: + subprocess.check_call( + ["kubectl", "create", "namespace", cluster.namespace, "--context", cluster.cluster_context], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + l.info(f"Created namespace {cluster.namespace}") + + except subprocess.CalledProcessError: # We do assume everything else works + l.debug(f"Namespace {cluster.namespace} already exists") + + for secret_name in ["yb-master-yugabyte-tls-cert", "yb-tserver-yugabyte-tls-cert", "yugabyte-tls-client-cert", "dss.public.certs"]: + + try: + subprocess.check_call( + ["kubectl", "delete", "secret", secret_name, "--namespace", cluster.namespace, "--context", cluster.cluster_context], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + l.info(f"Deleted old secret '{secret_name}'") + + except subprocess.CalledProcessError: # We do assume everything else works + l.debug(f"Secret '{secret_name}' not present on the cluster") + + for secret_name, folder in [ + ("yb-master-yugabyte-tls-cert", cluster.master_certs_dir), + ("yb-tserver-yugabyte-tls-cert", cluster.tserver_certs_dir), + ("yugabyte-tls-client-cert", cluster.client_certs_dir), + ("dss.public.certs", os.path.join("..", "..", "..", "build", "jwt-public-certs")), + ]: + + subprocess.check_call( + ["kubectl", "create", "secret", "generic", secret_name, "--namespace", cluster.namespace, "--context", cluster.cluster_context, "--from-file", folder], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + l.info(f"Created secret '{secret_name}'") diff --git a/deploy/operations/certificates-management/ca_pool.py b/deploy/operations/certificates-management/ca_pool.py new file mode 100644 index 000000000..6a770d8be --- /dev/null +++ b/deploy/operations/certificates-management/ca_pool.py @@ -0,0 +1,144 @@ +import base64 +import hashlib +import logging +import os +import re +import shutil +import tempfile + +from utils import get_cert_display_name, get_cert_serial + +l = logging.getLogger(__name__) + + +def build_pool_hash(cluster): + + CAs = [] + for f in os.listdir(cluster.ca_pool_dir): + + if f.endswith(".crt") and f != "ca.crt": + CAs.append(f.lower()) + + CAs = sorted(CAs) + + h = hashlib.sha256() + h.update((",".join(CAs)).encode("utf-8")) + + hashed = base64.b64encode(h.digest(), b"Aa").decode("utf-8") + + return f"{hashed[:5]}-{hashed[-10:-5]}" + + +def add_cas(cluster, certificate): + + folder = cluster.ca_pool_dir + + l.debug("Getting new CA metadata") + + with tempfile.NamedTemporaryFile(delete_on_close=False) as tf: + tf.write(certificate.encode("utf-8")) + tf.close() + + serial = get_cert_serial(tf.name) + name = get_cert_display_name(tf.name) + + filename = f"{serial}.crt" + + target_file = os.path.join(folder, filename) + + if os.path.exists(target_file): + l.info(f"CA {name} already present in the pool") + return + + l.info(f"Adding CA {name} in the pool") + + with open(target_file, "w") as f: + f.write(certificate) + + +def regenerate_ca_files(cluster): + + l.debug("Regenerating CA files from all CA in the pool") + + CAs = [] + for filename in os.listdir(cluster.ca_pool_dir): + + if filename.endswith(".crt") and filename != "ca.crt": + with open(os.path.join(cluster.ca_pool_dir, filename), "r") as f: + CAs.append(f.read()) + + CAs = sorted(CAs) + + with open(cluster.ca_pool_ca, "w") as f: + f.write("\n\n".join(CAs)) + + shutil.copy(cluster.ca_pool_ca, cluster.client_ca) + + for node_type in ["master", "tserver"]: + shutil.copy(cluster.ca_pool_ca, getattr(cluster, f"{node_type}_ca")) + + h = build_pool_hash(cluster) + + l.info(f"Regenerated CA files from the CA pool. Current pool hash: {h}") + + +def do_add_cas(cluster, certificates): + pattern = re.compile( + r"-----BEGIN CERTIFICATE-----\s*.+?\s*-----END CERTIFICATE-----", re.DOTALL + ) + for cert in pattern.findall(certificates): + add_cas(cluster, cert) + + regenerate_ca_files(cluster) + + +def do_remove_cas(cluster, certificates_or_serial): + pattern = re.compile( + r"-----BEGIN CERTIFICATE-----\s*.+?\s*-----END CERTIFICATE-----", re.DOTALL + ) + for cert in pattern.findall(certificates_or_serial): + with tempfile.NamedTemporaryFile(delete_on_close=False) as tf: + tf.write(cert.encode("utf-8")) + tf.close() + serial = get_cert_serial(tf.name) + name = get_cert_display_name(tf.name) + + filename = f"{serial}.crt" + + target = os.path.join(cluster.ca_pool_dir, filename) + + if os.path.isfile(target): + os.unlink(target) + l.info(f"Removed certificate {name}") + else: + l.info(f"Certificate {name} not present in pool") + + for filename in sorted(os.listdir(cluster.ca_pool_dir)): + if filename.endswith(".crt") and filename != "ca.crt": + + serial = get_cert_serial(os.path.join(cluster.ca_pool_dir, filename)) + name = get_cert_display_name(os.path.join(cluster.ca_pool_dir, filename)) + + if certificates_or_serial == name or certificates_or_serial == serial or f"SN={certificates_or_serial}, " in name or name.startswith(certificates_or_serial): + os.unlink(os.path.join(cluster.ca_pool_dir, filename)) + l.info(f"Removed certificate {name}") + + regenerate_ca_files(cluster) + +def do_get_ca(cluster): + with open(cluster.ca_cert_file, "r") as f: + print(f.read()) + +def do_get_pool_ca(cluster): + with open(cluster.ca_pool_ca, "r") as f: + print(f.read()) + +def do_list_pool_ca(cluster): + + h = build_pool_hash(cluster) + + print(f"Current CA pool hash: {h}") + + for filename in sorted(os.listdir(cluster.ca_pool_dir)): + if filename.endswith(".crt") and filename != "ca.crt": + print(get_cert_display_name(os.path.join(cluster.ca_pool_dir, filename))) diff --git a/deploy/operations/certificates-management/cluster.py b/deploy/operations/certificates-management/cluster.py new file mode 100644 index 000000000..c65d376be --- /dev/null +++ b/deploy/operations/certificates-management/cluster.py @@ -0,0 +1,151 @@ +import os + +from utils import slugify + + +class Cluster(object): + """Represent an instance of a cluster, expose paths""" + + def __init__(self, name, cluster_context, namespace, organization, nodes_count, nodes_public_address): + self._name = name + self.cluster_context = cluster_context + self.namespace = namespace + self.organization = organization + self.nodes_count = nodes_count + self.nodes_public_address = nodes_public_address + + @property + def name(self): + return slugify(self._name) + + @property + def directory(self): + # Replace characters breaking folder names + def remove_special_chars(s: str): + for c in [":", "/"]: + s = s.replace(c, "_") + return s + + return os.path.join(os.getcwd(), "workspace", remove_special_chars(self._name)) + + @property + def ca_key_dir(self): + return os.path.join(self.directory, "ca") + + @property + def ca_key_file(self): + return os.path.join(self.ca_key_dir, "ca.key") + + @property + def ca_cert_file(self): + return os.path.join(self.ca_key_dir, "ca.crt") + + @property + def ca_conf(self): + return os.path.join(self.ca_key_dir, "ca.conf") + + @property + def client_certs_dir(self): + return os.path.join(self.directory, "clients") + + @property + def client_ca(self): + return os.path.join(self.client_certs_dir, "root.crt") + + @property + def master_certs_dir(self): + return os.path.join(self.directory, "masters") + + @property + def master_ca(self): + return os.path.join(self.master_certs_dir, "ca.crt") + + @property + def tserver_certs_dir(self): + return os.path.join(self.directory, "tservers") + + @property + def tserver_ca(self): + return os.path.join(self.tserver_certs_dir, "ca.crt") + + @property + def ca_pool_dir(self): + return os.path.join(self.directory, "ca_pool") + + @property + def ca_pool_ca(self): + return os.path.join(self.ca_pool_dir, "ca.crt") + + @property + def is_ready(self): + return os.path.exists(self.ca_key_file) + + @property + def clients(self): + return ["yugabytedb"] # TODO: Do we need more, like a specifc one for the DSS? + + def get_client_cert_file(self, client): + return f"{self.client_certs_dir}/{client}.crt" + + def get_client_key_file(self, client): + return f"{self.client_certs_dir}/{client}.key" + + def get_client_csr_file(self, client): + return f"{self.ca_key_dir}/client.{client}.csr" + + def get_client_conf_file(self, client): + return f"{self.ca_key_dir}/client.{client}.conf" + + def is_client_ready(self, client): + return os.path.exists(self.get_client_cert_file(client)) + + def get_node_short_name(self, node_type, node_id): + return f"yb-{node_type}-{node_id}" + + def get_node_short_name_group(self, node_type, node_id): + short_name = self.get_node_short_name(node_type, node_id) + return f"{short_name}.yb-{node_type}s" + + def get_node_full_name(self, node_type, node_id): + short_name_group = self.get_node_short_name_group(node_type, node_id) + return f"{short_name_group}.{self.namespace}.svc.cluster.local" + + def get_node_full_name_without_group(self, node_type, node_id): + short_name = self.get_node_short_name(node_type, node_id) + return f"{short_name}.{self.namespace}.svc.cluster.local" + + def get_node_public_address(self, node_type, node_id): + return self.nodes_public_address.replace("", str(node_id)).replace("", node_type) + + def get_node_cert_file(self, node_type, node_id): + folder = getattr(self, f"{node_type}_certs_dir") + full_name = self.get_node_full_name(node_type, node_id) + return f"{folder}/node.{full_name}.crt" + + def get_node_key_file(self, node_type, node_id): + folder = getattr(self, f"{node_type}_certs_dir") + full_name = self.get_node_full_name(node_type, node_id) + return f"{folder}/node.{full_name}.key" + + def get_node_cert_second_file(self, node_type, node_id): + folder = getattr(self, f"{node_type}_certs_dir") + address = self.get_node_public_address(node_type, node_id) + if address: + return f"{folder}/node.{address}.crt" + + def get_node_key_second_file(self, node_type, node_id): + folder = getattr(self, f"{node_type}_certs_dir") + address = self.get_node_public_address(node_type, node_id) + if address: + return f"{folder}/node.{address}.key" + + def get_node_csr_file(self, node_type, node_id): + full_name = self.get_node_full_name(node_type, node_id) + return f"{self.ca_key_dir}/node.{full_name}.csr" + + def get_node_conf_file(self, node_type, node_id): + full_name = self.get_node_full_name(node_type, node_id) + return f"{self.ca_key_dir}/node.{full_name}.conf" + + def is_node_ready(self, node_type, node_id): + return os.path.exists(self.get_node_cert_file(node_type, node_id)) diff --git a/deploy/operations/certificates-management/dss-certs.py b/deploy/operations/certificates-management/dss-certs.py new file mode 100755 index 000000000..0cfbe821e --- /dev/null +++ b/deploy/operations/certificates-management/dss-certs.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import shutil +import sys + +from apply import do_apply +from cluster import Cluster +from init import do_init +from nodes import do_generate_nodes +from ca_pool import do_get_pool_ca, do_get_ca, do_add_cas, do_list_pool_ca, do_remove_cas + +l = logging.getLogger(__name__) + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Manage certificates for a yugabyte cluster" + ) + parser.add_argument( + "--name", + metavar="NAME", + required=True, + help="name of your cluster, should be unique to identify it", + ) + parser.add_argument( + "--organization", + metavar="ORGANIZATION", + default="generic-dss-organization", + help="name of your origanization", + ) + parser.add_argument( + "--cluster-context", + metavar="CLUSTER_CONTEXT", + required=True, + help="kubernetes cluster context name", + ) + parser.add_argument( + "--namespace", + metavar="NAMESPACE", + required=True, + help="kubernetes cluster namespace you are deploying to.", + ) + parser.add_argument( + "--nodes-count", + metavar="NODES_COUNT", + default="3", + help="Number of yugabyte nodes in the cluster, default to 3", + ) + parser.add_argument( + "--nodes-public-address", + metavar="NODES_PUBLIC_ADDRESS", + default="", + help="Public node address. Use to indicate id of the node (0, 1, ...), for the type (tserver, masters). Example: '..db.interuss.example'", + ) + parser.add_argument( + "--ca-file", + metavar="CA_FILE", + default="-", + help="CA file, for add/remove operation. Set to '-' to use stdin", + ) + parser.add_argument( + "--ca-serial", + metavar="CA_SERIAL", + help="CA serial, for remove operation. If set, --ca-file is ignored", + ) + parser.add_argument( + "action", + type=str, + help="action to be run", + choices=[ + "init", + "apply", + "regenerate-nodes", + "add-pool-ca", + "remove-pool-ca", + "list-pool-ca", + "get-pool-ca", + "get-ca", + "destroy", + ], + ) + parser.add_argument( + "--log-level", + type=str, + help="logging level", + default="INFO", + choices=[ + "DEBUG", + "INFO", + "WARNING", + "ERROR", + ], + ) + return parser.parse_args() + + +def main(): + + args = parse_args() + logging.basicConfig( + level=args.log_level, + format="%(asctime)-15s %(funcName)-25s %(levelname)-8s %(message)s", + ) + cluster = Cluster( + args.name, + args.cluster_context, + args.namespace, + args.organization, + args.nodes_count, + args.nodes_public_address, + ) + + def read_input(): + if args.ca_file == "-": + return sys.stdin.read() + + with open(args.ca_file, 'r') as f: + return f.read() + + if args.action == "init": + do_init(cluster) + elif args.action == "regenerate-nodes": + do_generate_nodes(cluster) + elif args.action == "apply": + do_apply(cluster) + elif args.action == "add-pool-ca": + do_add_cas(cluster, read_input()) + elif args.action == "remove-pool-ca": + if args.ca_serial: + do_remove_cas(cluster, args.ca_serial) + else: + do_remove_cas(cluster, read_input()) + elif args.action == "list-pool-ca": + do_list_pool_ca(cluster) + elif args.action == "get-pool-ca": + do_get_pool_ca(cluster) + elif args.action == "get-ca": + do_get_ca(cluster) + elif args.action == "destroy": + if input("Are you sure? You will loose all your certificates! [yN]") == "y": + shutil.rmtree(cluster.directory) + l.warning(f"Destroyed cluster certificates") + else: + l.info(f"Cancelled removal") + + +if __name__ == "__main__": + main() diff --git a/deploy/operations/certificates-management/init.py b/deploy/operations/certificates-management/init.py new file mode 100644 index 000000000..675d1baca --- /dev/null +++ b/deploy/operations/certificates-management/init.py @@ -0,0 +1,240 @@ +import logging +import os +import subprocess +import sys + +from ca_pool import do_add_cas +from nodes import do_generate_nodes +from utils import get_cert_display_name + +l = logging.getLogger(__name__) + + +def generate_ca_config(cluster): + l.debug("Creating CA configuration files and database") + + with open(cluster.ca_conf, "w") as f: + f.write( + f""" + [ ca ] + default_ca = my_ca + +[ my_ca ] +default_days = 3650 + +serial = {cluster.ca_key_dir}/serial.txt +database = {cluster.ca_key_dir}/index.txt +default_md = sha256 +policy = my_policy + +[ my_policy ] + +organizationName = supplied +commonName = supplied + +[req] +prompt=no +distinguished_name = my_distinguished_name +x509_extensions = my_extensions + +[ my_distinguished_name ] +organizationName = {cluster.organization} +commonName = CA.{cluster.name} + +[ my_extensions ] +keyUsage = critical,digitalSignature,nonRepudiation,keyEncipherment,keyCertSign +basicConstraints = critical,CA:true,pathlen:1 + +""" + ) + + with open(f"{cluster.ca_key_dir}/serial.txt", "w") as f: + f.write("0001") + + with open(f"{cluster.ca_key_dir}/index.txt", "w") as f: + f.write("") + + l.info("Created CA configuration files and database") + + +def generate_ca_key(cluster): + l.debug("Generating CA private key") + subprocess.check_call( + ["openssl", "genrsa", "-out", cluster.ca_key_file, "4096"], + stdout=subprocess.DEVNULL, + ) + l.info("Generated CA private key") + + +def generate_ca_cert(cluster): + l.debug("Generating CA certificate") + subprocess.check_call( + [ + "openssl", + "req", + "-new", + "-x509", + "-days", + "3650", + "-config", + cluster.ca_conf, + "-key", + cluster.ca_key_file, + "-out", + cluster.ca_cert_file, + ], + stdout=subprocess.DEVNULL, + ) + + name = get_cert_display_name(cluster.ca_cert_file) + + l.info(f"Generated CA certificate '{name}'") + + +def generate_ca(cluster): + generate_ca_config(cluster) + generate_ca_key(cluster) + generate_ca_cert(cluster) + + +def make_directories(cluster): + + l.debug("Creating directories") + + if not os.path.exists("workspace"): + os.makedirs("workspace") + + os.mkdir(cluster.directory) + os.mkdir(cluster.ca_key_dir) + os.mkdir(cluster.master_certs_dir) + os.mkdir(cluster.tserver_certs_dir) + os.mkdir(cluster.client_certs_dir) + os.mkdir(cluster.ca_pool_dir) + + l.info("Created directories") + + +def generate_clients(cluster): + + for client in cluster.clients: + if cluster.is_client_ready(client): + l.debug(f"Client '{client}' certificates already generated") + continue + generate_client_config(cluster, client) + generate_client_key(cluster, client) + generate_client_csr(cluster, client) + generate_client_cert(cluster, client) + + +def generate_client_config(cluster, client): + + l.debug(f"Creating client '{client}' configuration file") + + with open(cluster.get_client_conf_file(client), "w") as f: + f.write( + f"""[ req ] +prompt=no +distinguished_name = my_distinguished_name + +[ my_distinguished_name ] +organizationName = {cluster.organization} +commonName = client.{client} +""" + ) + + l.info(f"Created client '{client}' configuration file") + + +def generate_client_key(cluster, client): + + l.debug(f"Generating client '{client}' private key") + + subprocess.check_call( + ["openssl", "genrsa", "-out", cluster.get_client_key_file(client), "4096"] + ) + + l.info(f"Generated client '{client}' private key") + + +def generate_client_csr(cluster, client): + + l.debug(f"Generating client '{client}' certificate request") + + subprocess.check_call( + [ + "openssl", + "req", + "-new", + "-config", + cluster.get_client_conf_file(client), + "-key", + cluster.get_client_key_file(client), + "-out", + cluster.get_client_csr_file(client), + ], + stdout=subprocess.DEVNULL, + ) + + l.info(f"Generated client '{client}' certificate request") + + +def generate_client_cert(cluster, client): + + l.debug(f"Generating client '{client}' certificate") + + subprocess.check_call( + [ + "openssl", + "ca", + "-config", + cluster.ca_conf, + "-keyfile", + cluster.ca_key_file, + "-cert", + cluster.ca_cert_file, + "-policy", + "my_policy", + "-out", + cluster.get_client_cert_file(client), + "-outdir", + cluster.client_certs_dir, + "-in", + cluster.get_client_csr_file(client), + "-days", + "3650", + "-batch", + "-extfile", + cluster.get_client_conf_file(client), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + name = get_cert_display_name(cluster.get_client_cert_file(client)) + + l.info(f"Generated client '{client}' certificate '{name}'") + + +def do_init(cluster): + """Initialize a new cluster""" + + l.info("Initialization of a new cluster") + + if cluster.is_ready: + l.error("Cluster is already initialized, unable to continue") + sys.exit(1) + else: + l.debug("Cluster is not already initialized, continuing") + + make_directories(cluster) + generate_ca(cluster) + generate_clients(cluster) + + do_generate_nodes(cluster) + + with open(cluster.ca_cert_file, "r") as f: + do_add_cas(cluster, f.read()) + + l.info( + "The new cluster certificates are ready! Don't forget to 'apply' the configuration." + ) diff --git a/deploy/operations/certificates-management/nodes.py b/deploy/operations/certificates-management/nodes.py new file mode 100644 index 000000000..87799c7e4 --- /dev/null +++ b/deploy/operations/certificates-management/nodes.py @@ -0,0 +1,174 @@ +import logging +import subprocess +import sys +import shutil + +from utils import get_cert_display_name + + +def generate_node_config(cluster, node_type, node_id): + + l.debug(f"Creating {node_type} #{node_id} configuration file") + + short_name = cluster.get_node_short_name(node_type, node_id) + short_name_group = cluster.get_node_short_name_group(node_type, node_id) + full_name = cluster.get_node_full_name(node_type, node_id) + full_name_without_group = cluster.get_node_full_name_without_group( + node_type, node_id + ) + public_address = cluster.get_node_public_address(node_type, node_id) + + with open(cluster.get_node_conf_file(node_type, node_id), "w") as f: + f.write( + f"""[ req ] +prompt=no +distinguished_name = my_distinguished_name + +[ my_distinguished_name ] +organizationName = {cluster.organization} +commonName = {full_name} + +# Multiple subject alternative names (SANs) such as IP Address, +# DNS Name, Email, URI, and so on, can be specified under this section +[ req_ext ] +subjectAltName = @alt_names +[ alt_names ] +DNS.1 = {short_name} +DNS.2 = {full_name} +DNS.3 = {short_name_group} +DNS.4 = {full_name_without_group} +DNS.5 = yb-{node_type}s +DNS.6 = yb-{node_type}s.{cluster.namespace} +DNS.7 = yb-{node_type}s.{cluster.namespace}.svc.cluster.local +""" + ) + + if public_address: + f.write(f"""DNS.8 = {public_address} +""") + + l.info(f"Created {node_type} #{node_id} configuration file") + + +def generate_node_key(cluster, node_type, node_id): + + l.debug(f"Generating {node_type} #{node_id} private key") + + file = cluster.get_node_key_file(node_type, node_id) + + subprocess.check_call( + [ + "openssl", + "genrsa", + "-out", + file, + "4096", + ] + ) + + second_file = cluster.get_node_key_second_file(node_type, node_id) + + if second_file: + shutil.copy(file, second_file) + + l.info(f"Generated {node_type} #{node_id} private key") + + +def generate_node_csr(cluster, node_type, node_id): + + l.debug(f"Generating {node_type} #{node_id} certificate request") + + subprocess.check_call( + [ + "openssl", + "req", + "-new", + "-config", + cluster.get_node_conf_file(node_type, node_id), + "-key", + cluster.get_node_key_file(node_type, node_id), + "-out", + cluster.get_node_csr_file(node_type, node_id), + ], + stdout=subprocess.DEVNULL, + ) + + l.info(f"Generated {node_type} #{node_id} certificate request") + + +def generate_node_cert(cluster, node_type, node_id): + + l.debug(f"Generating {node_type} #{node_id} certificate") + + file = cluster.get_node_cert_file(node_type, node_id) + + subprocess.check_call( + [ + "openssl", + "ca", + "-config", + cluster.ca_conf, + "-keyfile", + cluster.ca_key_file, + "-cert", + cluster.ca_cert_file, + "-policy", + "my_policy", + "-out", + file, + "-outdir", + getattr(cluster, f"{node_type}_certs_dir"), + "-in", + cluster.get_node_csr_file(node_type, node_id), + "-days", + "3650", + "-batch", + "-extfile", + cluster.get_node_conf_file(node_type, node_id), + "-extensions", + "req_ext", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + second_file = cluster.get_node_cert_second_file(node_type, node_id) + + if second_file: + shutil.copy(file, second_file) + + name = get_cert_display_name(cluster.get_node_cert_file(node_type, node_id)) + + l.info(f"Generated {node_type} #{node_id} certificate '{name}'") + + +def generate_node(cluster, node_type, node_id): + if cluster.is_node_ready(node_type, node_id): + l.debug(f"{node_type} #{node_id} certificiates already generated") + return + + generate_node_config(cluster, node_type, node_id) + generate_node_key(cluster, node_type, node_id) + generate_node_csr(cluster, node_type, node_id) + generate_node_cert(cluster, node_type, node_id) + + +l = logging.getLogger(__name__) + + +def do_generate_nodes(cluster): + """Generate certificates for all nodes (master and tserver)""" + + l.info("Generation of nodes certificates") + + if not cluster.is_ready: + l.error("Cluster is not already initialized, unable to continue") + sys.exit(1) + else: + l.debug("Cluster is initialized, continuing") + + for node_type in ["master", "tserver"]: + for node_id in range(0, int(cluster.nodes_count)): + generate_node(cluster, node_type, node_id) + + l.info("All nodes certificates are ready") diff --git a/deploy/operations/certificates-management/utils.py b/deploy/operations/certificates-management/utils.py new file mode 100644 index 000000000..4947a3d3f --- /dev/null +++ b/deploy/operations/certificates-management/utils.py @@ -0,0 +1,51 @@ +import logging +import re +import ssl +import sys +import unicodedata + +l = logging.getLogger(__name__) + + +def slugify(text): + text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode("ascii") + text = text.lower() + text = re.sub(r"[^a-z0-9_\.]+", "-", text) + text = text.strip("-") + return text + + +def get_cert_display_name(path): + try: + cert_dict = ssl._ssl._test_decode_cert( + path + ) # We do use an internal function, to avoid installing dependencies + except Exception as e: + l.error(e) + sys.exit(1) + + serial = cert_dict.get("serialNumber", "") + + orga = "" + cn = "" + + for kv in cert_dict.get("subject", []): + for k, v in kv: + if k == "organizationName": + orga = v + elif k == "commonName": + cn = v + + return f"SN={serial[-8:]}, O={orga}, CN={cn}" + + +def get_cert_serial(path): + try: + cert_dict = ssl._ssl._test_decode_cert( + path + ) # We do use an internal function, to avoid installing dependencies + except Exception as e: + l.error(e) + sys.exit(1) + + return cert_dict["serialNumber"] diff --git a/deploy/operations/ci/aws-1/main.tf b/deploy/operations/ci/aws-1/main.tf index 998b71eb9..fd238a02b 100644 --- a/deploy/operations/ci/aws-1/main.tf +++ b/deploy/operations/ci/aws-1/main.tf @@ -19,7 +19,7 @@ module "terraform-aws-dss" { cluster_name = var.cluster_name crdb_image_tag = var.crdb_image_tag crdb_cluster_name = var.crdb_cluster_name - crdb_hostname_suffix = var.crdb_hostname_suffix + db_hostname_suffix = var.db_hostname_suffix crdb_locality = var.crdb_locality crdb_external_nodes = var.crdb_external_nodes image = var.image diff --git a/deploy/operations/ci/aws-1/terraform.tfvars b/deploy/operations/ci/aws-1/terraform.tfvars index 22be80d06..b9e935286 100644 --- a/deploy/operations/ci/aws-1/terraform.tfvars +++ b/deploy/operations/ci/aws-1/terraform.tfvars @@ -8,7 +8,7 @@ aws_route53_zone_id = "Z03377073HUSGB4L9FKEK" # Hostnames app_hostname = "dss.ci.aws-interuss.uspace.dev" -crdb_hostname_suffix = "db.ci.aws-interuss.uspace.dev" +db_hostname_suffix = "db.ci.aws-interuss.uspace.dev" # Kubernetes configuration kubernetes_version = 1.32 diff --git a/deploy/operations/ci/aws-1/variables.gen.tf b/deploy/operations/ci/aws-1/variables.gen.tf index d5bcaca07..fd1b51a7f 100644 --- a/deploy/operations/ci/aws-1/variables.gen.tf +++ b/deploy/operations/ci/aws-1/variables.gen.tf @@ -54,17 +54,35 @@ variable "app_hostname" { EOT } -variable "crdb_hostname_suffix" { +variable "db_hostname_suffix" { type = string description = <<-EOT - The domain name suffix shared by all of your CockroachDB nodes. - For instance, if your CRDB nodes were addressable at 0.db.example.com, - 1.db.example.com and 2.db.example.com, then the value would be db.example.com. + The domain name suffix shared by all of your databases nodes. + For instance, if your database nodes were addressable at 0.db.example.com, + 1.db.example.com and 2.db.example.com (CockroachDB) or 0.master.db.example.com, 1.tserver.db.example.com (Yugabyte), then the value would be db.example.com. Example: db.example.com EOT } + +variable "datastore_type" { + type = string + description = <<-EOT + Type of datastore used + + Supported technologies: cockroachdb, yugabyte + EOT + + validation { + condition = contains(["cockroachdb", "yugabyte"], var.datastore_type) + error_message = "Supported technologies: cockroachdb, yugabyte" + } + + default = "cockroachdb" +} + + variable "cluster_name" { type = string description = <<-EOT @@ -226,10 +244,13 @@ variable "should_init" { for a pool. When set true, this can initialize the data directories on your cluster, and prevent you from joining an existing pool. + Only used for CockroachDB with Tanka + Example: `true` EOT } + variable "desired_rid_db_version" { type = string description = <<-EOT @@ -319,3 +340,62 @@ variable "kubernetes_namespace" { } } +variable "yugabyte_cloud" { + type = string + description = <<-EOT + Cloud of yugabyte instances, used for partionning. + + Should be set to dss unless you're doing advanced partitionning. + EOT + + default = "dss" +} + + +variable "yugabyte_region" { + type = string + description = <<-EOT + Region of yugabyte instances, used for partionning. + + Should be different from others USS in a cluster. + EOT + + default = "uss-1" +} + + +variable "yugabyte_zone" { + type = string + description = <<-EOT + Zone of yugabyte instances, used for partionning. + + Should be set to zone unless you're doing advanced partitionning. + EOT + + default = "zone" +} + + +variable "yugabyte_light_resources" { + type = bool + description = <<-EOT + Enable light resources reservation for yugabyte instances. + + Useful for a dev cluster when you don't want to overload your kubernetes cluster. + EOT + + default = false +} + + +variable "yugabyte_external_nodes" { + type = list(string) + description = <<-EOT + Fully-qualified domain name of existing yugabyte master nodes outside of the cluster if you are joining an existing pool. + Example: ["0.master.db.dss.example.com", "1.master.db.dss.example.com", "2.master.db.dss.example.com"] + EOT + default = [] +} + + + diff --git a/deploy/services/helm-charts/dss/templates/_helpers.tpl b/deploy/services/helm-charts/dss/templates/_helpers.tpl index f2e569003..31ddbcc5c 100644 --- a/deploy/services/helm-charts/dss/templates/_helpers.tpl +++ b/deploy/services/helm-charts/dss/templates/_helpers.tpl @@ -50,6 +50,6 @@ yugabyte {{ if .cockroachdbEnabled }} - "/cockroach/cockroach sql --certs-dir /cockroach/cockroach-certs/ --host {{.datastoreHost}} --port \"{{.datastorePort}}\" --format raw -e \"SELECT * FROM crdb_internal.databases where name = '{{.schemaName}}';\" | grep {{.schemaName}}" {{ else }} - - "ysqlsh --host {{.datastoreHost}} --port \"{{.datastorePort}}\" -c \"SELECT datname FROM pg_database where datname = '{{.schemaName}}';\" | grep {{.schemaName}}" + - "ysqlsh --host {{.datastoreHost}} --port \"{{.datastorePort}}\" \"sslmode=require\" -c \"SELECT datname FROM pg_database where datname = '{{.schemaName}}';\" | grep {{.schemaName}}" {{ end }} {{- end -}} diff --git a/deploy/services/helm-charts/dss/templates/_volumes.tpl b/deploy/services/helm-charts/dss/templates/_volumes.tpl index e21dc30e4..981e5d80c 100644 --- a/deploy/services/helm-charts/dss/templates/_volumes.tpl +++ b/deploy/services/helm-charts/dss/templates/_volumes.tpl @@ -2,27 +2,41 @@ - name: ca-certs secret: defaultMode: 256 - secretName: cockroachdb.ca.crt + secretName: {{ if .cockroachdbEnabled }}cockroachdb.ca.crt{{ else }}yugabyte-tls-client-cert{{ end }} {{- end -}} {{- define "ca-certs:volumeMount" -}} +{{ if .cockroachdbEnabled }} - mountPath: /cockroach/cockroach-certs/ca.crt name: ca-certs subPath: ca.crt +{{ else }} +- mountPath: /opt/yugabyte-certs/ca.crt + name: ca-certs + subPath: root.crt +{{- end -}} {{- end -}} - {{- define "client-certs:volume" -}} - name: client-certs secret: defaultMode: 256 - secretName: cockroachdb.client.root + secretName: {{ if .cockroachdbEnabled }}cockroachdb.client.root{{ else }}yugabyte-tls-client-cert{{ end }} {{- end -}} {{- define "client-certs:volumeMount" -}} +{{ if .cockroachdbEnabled }} - mountPath: /cockroach/cockroach-certs/client.root.crt name: client-certs subPath: client.root.crt - mountPath: /cockroach/cockroach-certs/client.root.key name: client-certs subPath: client.root.key +{{ else }} +- mountPath: /opt/yugabyte-certs/client.yugabyte.crt + name: client-certs + subPath: yugabytedb.crt +- mountPath: /opt/yugabyte-certs/client.yugabyte.key + name: client-certs + subPath: yugabytedb.key +{{- end -}} {{- end -}} @@ -35,4 +49,4 @@ {{- define "public-certs:volumeMount" -}} - mountPath: /public-certs name: public-certs -{{- end -}} \ No newline at end of file +{{- end -}} diff --git a/deploy/services/helm-charts/dss/templates/dss-core-service.yaml b/deploy/services/helm-charts/dss/templates/dss-core-service.yaml index d129031ed..6521a0fbf 100644 --- a/deploy/services/helm-charts/dss/templates/dss-core-service.yaml +++ b/deploy/services/helm-charts/dss/templates/dss-core-service.yaml @@ -22,7 +22,7 @@ metadata: name: {{.Release.Name}}-core-service spec: minReadySeconds: 30 - replicas: {{ len .Values.loadBalancers.cockroachdbNodes }} + replicas: {{ if $.Values.cockroachdb.enabled -}}{{ len .Values.loadBalancers.cockroachdbNodes }}{{- else -}}{{ len .Values.loadBalancers.yugabyteMasterNodes }}{{- end }} selector: matchLabels: app: {{.Release.Name}}-core-service @@ -51,6 +51,9 @@ spec: - --cockroach_ssl_dir=/cockroach/cockroach-certs - --cockroach_ssl_mode=verify-full - --locality={{ .Values.cockroachdb.conf.locality }} +{{ else }} + - --cockroach_ssl_dir=/opt/yugabyte-certs + - --cockroach_ssl_mode=verify-full {{ end }} - --dump_requests=true - --enable_scd={{$dss.enableScd | default true}} @@ -80,12 +83,12 @@ spec: stdin: false tty: false volumeMounts: - {{- include "ca-certs:volumeMount" . | nindent 12 }} - {{- include "client-certs:volumeMount" . | nindent 12 }} - {{- include "public-certs:volumeMount" . | nindent 12 }} + {{- include "ca-certs:volumeMount" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 12 }} + {{- include "client-certs:volumeMount" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 12 }} + {{- include "public-certs:volumeMount" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 12 }} imagePullSecrets: [] terminationGracePeriodSeconds: 30 volumes: - {{- include "ca-certs:volume" . | nindent 8 }} - {{- include "client-certs:volume" . | nindent 8 }} - {{- include "public-certs:volume" . | nindent 8 }} + {{- include "ca-certs:volume" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 8 }} + {{- include "client-certs:volume" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 8 }} + {{- include "public-certs:volume" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 8 }} diff --git a/deploy/services/helm-charts/dss/templates/schema-manager.yaml b/deploy/services/helm-charts/dss/templates/schema-manager.yaml index fbd9b22a6..f5a232d76 100644 --- a/deploy/services/helm-charts/dss/templates/schema-manager.yaml +++ b/deploy/services/helm-charts/dss/templates/schema-manager.yaml @@ -46,6 +46,8 @@ spec: - --schemas_dir=/db-schemas/{{$service}} {{ else }} - --schemas_dir=/db-schemas/yugabyte/{{$service}} + - --cockroach_ssl_dir=/opt/yugabyte-certs/ + - --cockroach_ssl_mode=verify-full {{ end }} - --db_version={{$schemaVersion}} command: @@ -57,12 +59,12 @@ spec: stdin: false tty: false volumeMounts: - {{- include "ca-certs:volumeMount" . | nindent 12 }} - {{- include "client-certs:volumeMount" . | nindent 12 }} + {{- include "ca-certs:volumeMount" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 12 }} + {{- include "client-certs:volumeMount" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 12 }} imagePullSecrets: [] restartPolicy: OnFailure terminationGracePeriodSeconds: 30 volumes: - {{- include "ca-certs:volume" . | nindent 8 }} - {{- include "client-certs:volume" . | nindent 8 }} + {{- include "ca-certs:volume" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 8 }} + {{- include "client-certs:volume" (dict "cockroachdbEnabled" $.Values.cockroachdb.enabled ) | nindent 8 }} {{- end -}} diff --git a/deploy/services/helm-charts/dss/templates/yugabyte-loadbalancers.yaml b/deploy/services/helm-charts/dss/templates/yugabyte-loadbalancers.yaml new file mode 100644 index 000000000..5614ff32d --- /dev/null +++ b/deploy/services/helm-charts/dss/templates/yugabyte-loadbalancers.yaml @@ -0,0 +1,92 @@ +{{- $cloudProvider := $.Values.global.cloudProvider}} + +{{- if $.Values.yugabyte.enabled }} + +# Master nodes Gateways +{{- range $i, $lb := .Values.loadBalancers.yugabyteMasterNodes }} +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + {{- include (printf "%s-lb-crdb-annotations" $cloudProvider) + (dict + "name" (printf "%s-%s" "yuga-master-ext" ( $i | toString) ) + "ip" $lb.ip + "subnet" $lb.subnet + "cloudProvider" $cloudProvider + ) | nindent 4 + }} + labels: + app: yugabyte + name: yuga-master-ext-{{$i}} + name: yuga-master-ext-{{$i}} +spec: + {{- include (printf "%s-lb-spec" $cloudProvider) (dict "ip" $lb.ip) | nindent 2}} + ports: + - name: yugabyte-master-db-ext-{{$i}} + port: 7100 + targetPort: 7100 + - name: yugabyte-master-ui-ext-{{$i}} + port: 7000 + targetPort: 7000 + - name: yugabyte-master-ui2-ext-{{$i}} + port: 9000 + targetPort: 9000 + publishNotReadyAddresses: true + selector: + statefulset.kubernetes.io/pod-name: yb-master-{{$i}} + type: LoadBalancer +{{- end }} + +# Tserver nodes Gateways +{{- range $i, $lb := .Values.loadBalancers.yugabyteTserverNodes }} +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + {{- include (printf "%s-lb-crdb-annotations" $cloudProvider) + (dict + "name" (printf "%s-%s" " yuga-tserver-ext" ( $i | toString) ) + "ip" $lb.ip + "subnet" $lb.subnet + "cloudProvider" $cloudProvider + ) | nindent 4 + }} + labels: + app: yugabyte + name: yuga-tserver-ext-{{$i}} + name: yuga-tserver-ext-{{$i}} +spec: + {{- include (printf "%s-lb-spec" $cloudProvider) (dict "ip" $lb.ip) | nindent 2}} + ports: + - name: yugabyte-tserver-db-ext-{{$i}} + port: 9100 + targetPort: 9100 + - name: yugabyte-tserver-ui-ext-{{$i}} + port: 9000 + targetPort: 9000 + - name: yugabyte-tserver-ui2-ext-{{$i}} + port: 7000 + targetPort: 7000 + - name: yugabyte-tserver-ycql-ext-{{$i}} + port: 9042 + targetPort: 9042 + - name: yugabyte-tserver-ysql-ext-{{$i}} + port: 5433 + targetPort: 5433 + - name: yugabyte-tserver-metrics-ext-{{$i}} + port: 13000 + targetPort: 13000 + - name: yugabyte-tserver-metrics-2-ext-{{$i}} + port: 12000 + targetPort: 12000 + publishNotReadyAddresses: true + selector: + statefulset.kubernetes.io/pod-name: yb-tserver-{{$i}} + type: LoadBalancer +{{- end }} +{{- end }} diff --git a/deploy/services/helm-charts/dss/values.yaml b/deploy/services/helm-charts/dss/values.yaml index dfb2ed0e2..0d4727efd 100644 --- a/deploy/services/helm-charts/dss/values.yaml +++ b/deploy/services/helm-charts/dss/values.yaml @@ -19,4 +19,27 @@ yugabyte: Image: repository: yugabytedb/yugabyte + isMultiAz: true + # multicluster: + # createServicePerPod: true + + gflags: + master: + placement_cloud: "dss" + placement_region: "uss-1" + placement_zone: "zone" + tserver: + placement_cloud: "dss" + placement_region: "uss-1" + placement_zone: "zone" + + masterAddresses: "yb-master-0.yb-masters.default.svc.cluster.local:7100,yb-master-1.yb-masters.default.svc.cluster.local:7100,yb-master-2.yb-masters.default.svc.cluster.local:7100" + + tls: + enabled: true + nodeToNode: true + clientToServer: true + insecure: false + provided: true + # See https://github.com/yugabyte/charts/blob/master/stable/yugabyte/values.yaml diff --git a/deploy/services/tanka/examples/minimum/main.jsonnet b/deploy/services/tanka/examples/minimum/main.jsonnet index 52005bc7b..208887ddf 100644 --- a/deploy/services/tanka/examples/minimum/main.jsonnet +++ b/deploy/services/tanka/examples/minimum/main.jsonnet @@ -13,7 +13,7 @@ local metadata = metadataBase { enableScd: false, // <-- This boolean value is VAR_ENABLE_SCD cockroach+: { image: 'VAR_CRDB_DOCKER_IMAGE_NAME', - hostnameSuffix: 'VAR_CRDB_HOSTNAME_SUFFIX', + hostnameSuffix: 'VAR_DB_HOSTNAME_SUFFIX', locality: 'VAR_CRDB_LOCALITY', nodeIPs: ['VAR_CRDB_NODE_IP1', 'VAR_CRDB_NODE_IP2', 'VAR_CRDB_NODE_IP3'], shouldInit: false, // <-- This boolean value is VAR_SHOULD_INIT